diff --git a/.gitignore b/.gitignore
index e0b82c1..ddbb497 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,12 +19,12 @@ wheels/
*.egg-info/
.installed.cfg
*.egg
-data/*
+
# Virtual Environment
venv/
env/
ENV/
-.venv
+.venv/
# IDE
.vscode/
@@ -34,51 +34,69 @@ ENV/
*~
.DS_Store
+# Claude Code
+.claude/
+
# API Keys - 중요!
.env
-.env.local
-.env.development
-.env.test
-.env.production
-
+.env.*
apis/gemini_keys.yaml
-!apis/gemini_keys.yaml.template
+apis/*.yaml
+!apis/*-example.yaml
+!apis/*.template.yaml
# Logs
*.log
+logs/
# Jupyter Notebook
-.ipynb_checkpoints
+.ipynb_checkpoints/
# pytest
.pytest_cache/
.coverage
+htmlcov/
# MyPy
.mypy_cache/
.dmypy.json
dmypy.json
-# database
+# Data - 원본 데이터
+data/
+
+# Output - 생성된 결과물
+output/
+output_*/
+I_origin_*/
+
+# Temp - 임시 파일
+temp/
+
+# Archives
+*.zip
+*.tar.gz
+*.rar
+
+# Generated JSON (except input templates)
+pipeline_output*.json
+qa_difficulty_analysis_*.json
+qa_for_review_*.json
+eval_results_*.json
+
+# Keep input templates
+!test_*_input.json
+
+# Database/Token
info/
token.json
-*.json
-test_input.json
-# env
+# Docs (if generated)
.bemad/
-docs/
-pipeline_ui/backend/checkpoints/*
-pipeline_ui/backend/output/*
-pipeline_ui/backend/uploads/*
-# Frontend (Node.js)
-pipeline_ui/frontend/node_modules/*
+# Pipeline UI
+pipeline_ui/backend/checkpoints/
+pipeline_ui/backend/output/
+pipeline_ui/backend/uploads/
+pipeline_ui/frontend/node_modules/
pipeline_ui/frontend/package-lock.json
-
-
-I_origin_0/*
-I_origin_1/*
-I_origin_2/*
-
-output/*
\ No newline at end of file
diff --git a/capture_html_images.py b/capture_html_images.py
new file mode 100644
index 0000000..4491bcb
--- /dev/null
+++ b/capture_html_images.py
@@ -0,0 +1,127 @@
+"""
+Capture HTML files from output_* directories as images using Playwright.
+"""
+import argparse
+import asyncio
+from pathlib import Path
+from typing import List
+
+from playwright.async_api import async_playwright
+
+
+async def capture_html_file_async(
+ html_path: Path,
+ output_path: Path,
+ width: int = 800,
+) -> None:
+ """Capture a single HTML file as an image."""
+ html_content = html_path.read_text(encoding="utf-8")
+
+ async with async_playwright() as p:
+ browser = await p.chromium.launch(headless=True)
+ try:
+ page = await browser.new_page(viewport={"width": width, "height": 600})
+ await page.set_content(html_content)
+ await page.screenshot(path=output_path, full_page=True)
+ finally:
+ await browser.close()
+
+
+async def capture_batch_async(
+ html_files: List[Path],
+ output_dir: Path,
+ width: int = 800,
+) -> None:
+ """Capture multiple HTML files, reusing a single browser instance."""
+ async with async_playwright() as p:
+ browser = await p.chromium.launch(headless=True)
+ try:
+ for html_path in html_files:
+ output_path = output_dir / f"{html_path.stem}.png"
+ if output_path.exists():
+ print(f" [SKIP] {output_path.name} already exists")
+ continue
+
+ try:
+ html_content = html_path.read_text(encoding="utf-8")
+ page = await browser.new_page(viewport={"width": width, "height": 600})
+ await page.set_content(html_content)
+ await page.screenshot(path=output_path, full_page=True)
+ await page.close()
+ print(f" [OK] {html_path.name} -> {output_path.name}")
+ except Exception as e:
+ print(f" [ERROR] {html_path.name}: {e}")
+ finally:
+ await browser.close()
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Capture HTML files as images")
+ parser.add_argument(
+ "--output-dirs",
+ nargs="+",
+ default=None,
+ help="Specific output directories to process (e.g., output_academic output_finance)",
+ )
+ parser.add_argument(
+ "--width",
+ type=int,
+ default=800,
+ help="Viewport width for rendering (default: 800)",
+ )
+ parser.add_argument(
+ "--force",
+ action="store_true",
+ help="Overwrite existing images",
+ )
+ args = parser.parse_args()
+
+ base_dir = Path(__file__).parent
+
+ # Find output_* directories
+ if args.output_dirs:
+ output_dirs = [base_dir / d for d in args.output_dirs]
+ else:
+ output_dirs = sorted(base_dir.glob("output_*"))
+ output_dirs = [d for d in output_dirs if d.is_dir()]
+
+ if not output_dirs:
+ print("No output_* directories found.")
+ return
+
+ print(f"Found {len(output_dirs)} output directories to process")
+
+ for output_dir in output_dirs:
+ html_dir = output_dir / "html"
+ if not html_dir.exists():
+ print(f"\n[SKIP] {output_dir.name}: no html/ subdirectory")
+ continue
+
+ # Create images directory
+ images_dir = output_dir / "images"
+ images_dir.mkdir(exist_ok=True)
+
+ html_files = sorted(html_dir.glob("*.html"))
+ if not html_files:
+ print(f"\n[SKIP] {output_dir.name}: no HTML files found")
+ continue
+
+ # Filter out already processed files unless --force
+ if not args.force:
+ html_files = [
+ f for f in html_files
+ if not (images_dir / f"{f.stem}.png").exists()
+ ]
+
+ if not html_files:
+ print(f"\n[SKIP] {output_dir.name}: all files already processed")
+ continue
+
+ print(f"\n[Processing] {output_dir.name}: {len(html_files)} HTML files")
+ asyncio.run(capture_batch_async(html_files, images_dir, args.width))
+
+ print("\nDone!")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/eval/__init__.py b/eval/__init__.py
index 65cc49f..4bdd2ee 100644
--- a/eval/__init__.py
+++ b/eval/__init__.py
@@ -28,6 +28,13 @@
evaluate_predictions,
run_evaluation,
)
+from .evaluate_vllm import (
+ EvalConfig,
+ load_qa_from_pipeline_output,
+ evaluate_domain,
+ evaluate_all_domains,
+ DOMAIN_DIRS,
+)
__all__ = [
# Dataset
@@ -50,4 +57,10 @@
# Evaluate
"evaluate_predictions",
"run_evaluation",
+ # vLLM Evaluate
+ "EvalConfig",
+ "load_qa_from_pipeline_output",
+ "evaluate_domain",
+ "evaluate_all_domains",
+ "DOMAIN_DIRS",
]
diff --git a/eval/evaluate_vllm.py b/eval/evaluate_vllm.py
new file mode 100644
index 0000000..aa9740d
--- /dev/null
+++ b/eval/evaluate_vllm.py
@@ -0,0 +1,648 @@
+#!/usr/bin/env python3
+"""
+vLLM 서버를 사용한 Table QA 평가 스크립트.
+
+output_* 디렉토리의 HTML 테이블 이미지와 QA 데이터를 사용하여
+멀티모달 모델의 Table QA 성능을 평가합니다.
+
+Usage:
+ # 단일 도메인 평가
+ python -m eval.evaluate_vllm --domain public --vllm-url http://localhost:8000/v1
+
+ # 모든 도메인 평가
+ python -m eval.evaluate_vllm --all-domains --vllm-url http://localhost:8000/v1
+
+ # 특정 모델 사용
+ python -m eval.evaluate_vllm --domain business --model Qwen/Qwen2-VL-7B-Instruct
+
+ # LLM-as-Judge 포함
+ python -m eval.evaluate_vllm --domain finance --use-judge --judge-model gpt-4o
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import logging
+import os
+import sys
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import List, Dict, Any, Optional
+
+# 프로젝트 루트를 path에 추가
+project_root = Path(__file__).parent.parent
+if str(project_root) not in sys.path:
+ sys.path.insert(0, str(project_root))
+
+from eval.dataset import QAItem, EvalDataset
+from eval.inference import VLLMClient, InferenceRequest, InferenceResponse, run_inference
+from eval.metrics import compute_metrics, aggregate_metrics, EvalResult, AggregatedMetrics
+from eval.evaluate import evaluate_predictions, generate_report, print_report
+from eval.llm_judge import create_judge_client
+
+logging.basicConfig(
+ level=logging.INFO,
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+
+# 도메인별 output 디렉토리 매핑
+DOMAIN_DIRS = {
+ "academic": "output_academic",
+ "business": "output_business",
+ "finance": "output_finance",
+ "medical": "output_medical",
+ "public": "output_public",
+}
+
+# 기본 프롬프트 템플릿
+DEFAULT_PROMPT_TEMPLATE = """당신은 테이블 이미지를 분석하여 질문에 답하는 AI 어시스턴트입니다.
+주어진 테이블 이미지를 주의 깊게 분석한 후, 질문에 대해 정확하고 간결하게 답변해주세요.
+
+질문: {question}
+
+답변:"""
+
+# Context가 있는 경우의 프롬프트 템플릿
+CONTEXT_PROMPT_TEMPLATE = """당신은 테이블 이미지를 분석하여 질문에 답하는 AI 어시스턴트입니다.
+주어진 테이블 이미지와 아래 문맥을 함께 고려하여 질문에 정확하고 간결하게 답변해주세요.
+
+[문맥]
+{context}
+
+질문: {question}
+
+답변:"""
+
+
+@dataclass
+class EvalConfig:
+ """평가 설정"""
+ domain: str
+ vllm_url: str = "http://localhost:8000/v1"
+ model: str = "default"
+ max_tokens: int = 512
+ temperature: float = 0.0
+ max_concurrent: int = 10
+ timeout: float = 120.0
+ use_judge: bool = False
+ judge_provider: str = "openai"
+ judge_model: Optional[str] = None
+ judge_api_key: Optional[str] = None
+ output_dir: Optional[Path] = None
+ limit: Optional[int] = None # 평가할 최대 샘플 수 (디버깅용)
+ qa_types: Optional[List[str]] = None # 특정 QA 타입만 평가
+
+
+def find_table_images(
+ output_dir: Path,
+ pair_id: str,
+ image_paths: List[str],
+) -> List[Path]:
+ """
+ QA 항목에 해당하는 테이블 이미지를 찾습니다.
+
+ HTML 파일이 이미지로 캡처되었다고 가정하고,
+ output_dir/images/ 디렉토리에서 이미지를 찾습니다.
+
+ 파일명 패턴:
+ - pair_id: "B_origin_3_3_0"
+ - HTML: "B_origin_3_3_0_table_0.html", "B_origin_3_3_0_table_1.html"
+ - Images: "B_origin_3_3_0_table_0.png", "B_origin_3_3_0_table_1.png"
+
+ Args:
+ output_dir: output_* 디렉토리 경로
+ pair_id: QA pair ID
+ image_paths: 원본 이미지 경로 리스트 (data/Public/Table/P_origin_0/...)
+
+ Returns:
+ 찾은 이미지 경로 리스트
+ """
+ found_images = []
+
+ images_dir = output_dir / "images"
+ html_dir = output_dir / "html"
+
+ # 1. images/ 디렉토리에서 캡처된 이미지 찾기 (pair_id_table_*.png 패턴)
+ if images_dir.exists():
+ # pair_id_table_N.png 패턴으로 찾기
+ for img_file in sorted(images_dir.glob(f"{pair_id}_table_*.png")):
+ found_images.append(img_file)
+
+ # 못 찾았으면 pair_id*.png 패턴으로 시도
+ if not found_images:
+ for img_file in sorted(images_dir.glob(f"{pair_id}*.png")):
+ found_images.append(img_file)
+
+ # 2. html/ 디렉토리의 HTML 파일에 대응하는 이미지 찾기
+ if not found_images and html_dir.exists() and images_dir.exists():
+ for html_file in sorted(html_dir.glob(f"{pair_id}*.html")):
+ img_path = images_dir / f"{html_file.stem}.png"
+ if img_path.exists():
+ found_images.append(img_path)
+
+ # 3. 원본 이미지 경로가 존재하면 사용 (fallback)
+ if not found_images:
+ for orig_path in image_paths:
+ p = Path(orig_path)
+ if p.exists():
+ found_images.append(p)
+ else:
+ # 프로젝트 루트 기준 상대 경로 시도
+ full_path = project_root / orig_path
+ if full_path.exists():
+ found_images.append(full_path)
+
+ return sorted(set(found_images)) # 중복 제거 및 정렬
+
+
+def load_qa_from_pipeline_output(
+ output_dir: Path,
+ limit: Optional[int] = None,
+ qa_types: Optional[List[str]] = None,
+) -> EvalDataset:
+ """
+ pipeline_output.json에서 QA 데이터를 로드합니다.
+
+ Args:
+ output_dir: output_* 디렉토리
+ limit: 최대 로드할 샘플 수
+ qa_types: 특정 QA 타입만 로드
+
+ Returns:
+ EvalDataset
+ """
+ pipeline_output = output_dir / "pipeline_output.json"
+
+ if not pipeline_output.exists():
+ logger.warning(f"pipeline_output.json not found in {output_dir}")
+ return EvalDataset()
+
+ with open(pipeline_output, "r", encoding="utf-8") as f:
+ data = json.load(f)
+
+ items = []
+ skipped_no_images = 0
+ skipped_qa_type = 0
+
+ for entry in data:
+ pair_id = entry.get("pair_id", entry.get("name", "unknown"))
+ image_paths = entry.get("image_paths", [])
+ domain = entry.get("domain", "unknown")
+ qa_results = entry.get("qa_results", [])
+
+ # 테이블 이미지 찾기
+ table_images = find_table_images(output_dir, pair_id, image_paths)
+
+ for idx, qa in enumerate(qa_results):
+ qa_type = qa.get("type", "unknown")
+
+ # QA 타입 필터링
+ if qa_types and qa_type not in qa_types:
+ skipped_qa_type += 1
+ continue
+
+ item_id = f"{pair_id}_{idx}"
+
+ # 이미지 경로 결정
+ if table_images:
+ item_image_paths = [str(p) for p in table_images]
+ elif image_paths:
+ # 원본 경로 사용 (fallback)
+ item_image_paths = image_paths
+ else:
+ skipped_no_images += 1
+ continue
+
+ item = QAItem(
+ id=item_id,
+ question=qa.get("question", ""),
+ answer=qa.get("answer", ""),
+ qa_type=qa_type,
+ image_paths=item_image_paths,
+ reasoning_annotation=qa.get("reasoning_annotation"),
+ context=qa.get("context"),
+ source_file=str(pipeline_output),
+ )
+ items.append(item)
+
+ if limit and len(items) >= limit:
+ break
+
+ if limit and len(items) >= limit:
+ break
+
+ if skipped_no_images:
+ logger.warning(f"Skipped {skipped_no_images} QA items without images")
+ if skipped_qa_type:
+ logger.info(f"Skipped {skipped_qa_type} QA items due to type filter")
+
+ dataset = EvalDataset(
+ items=items,
+ metadata={
+ "source": str(pipeline_output),
+ "domain": output_dir.name,
+ "total_entries": len(data),
+ "loaded_qa_count": len(items),
+ }
+ )
+
+ return dataset
+
+
+def create_inference_requests(
+ dataset: EvalDataset,
+ prompt_template: Optional[str] = None,
+) -> List[InferenceRequest]:
+ """
+ 추론 요청을 생성합니다.
+
+ Args:
+ dataset: 평가 데이터셋
+ prompt_template: 프롬프트 템플릿
+
+ Returns:
+ InferenceRequest 리스트
+ """
+ requests = []
+
+ for item in dataset:
+ # context가 있으면 context 템플릿 사용
+ if item.context:
+ template = prompt_template or CONTEXT_PROMPT_TEMPLATE
+ prompt = template.format(
+ question=item.question,
+ context=item.context,
+ )
+ else:
+ template = prompt_template or DEFAULT_PROMPT_TEMPLATE
+ prompt = template.format(question=item.question)
+
+ request = InferenceRequest(
+ id=item.id,
+ prompt=prompt,
+ ground_truth=item.answer,
+ qa_type=item.qa_type,
+ image_paths=item.image_paths,
+ )
+ requests.append(request)
+
+ return requests
+
+
+async def evaluate_domain(
+ config: EvalConfig,
+) -> tuple[List[EvalResult], AggregatedMetrics, Dict[str, Any]]:
+ """
+ 단일 도메인에 대해 평가를 실행합니다.
+
+ Args:
+ config: 평가 설정
+
+ Returns:
+ (개별 결과, 집계 메트릭, 메타데이터)
+ """
+ domain_dir_name = DOMAIN_DIRS.get(config.domain)
+ if not domain_dir_name:
+ raise ValueError(f"Unknown domain: {config.domain}. Available: {list(DOMAIN_DIRS.keys())}")
+
+ output_dir = project_root / domain_dir_name
+ if not output_dir.exists():
+ raise FileNotFoundError(f"Output directory not found: {output_dir}")
+
+ logger.info(f"Evaluating domain: {config.domain}")
+ logger.info(f"Output directory: {output_dir}")
+
+ # 1. 데이터셋 로드
+ dataset = load_qa_from_pipeline_output(
+ output_dir,
+ limit=config.limit,
+ qa_types=config.qa_types,
+ )
+
+ if len(dataset) == 0:
+ logger.error("No QA items loaded. Check if pipeline_output.json exists and contains valid data.")
+ return [], AggregatedMetrics(), {}
+
+ logger.info(f"Loaded {len(dataset)} QA items")
+ logger.info(f"Type distribution: {dataset.get_type_distribution()}")
+
+ # 2. 추론 요청 생성
+ requests = create_inference_requests(dataset)
+
+ # 3. vLLM 클라이언트 생성
+ client = VLLMClient(
+ base_url=config.vllm_url,
+ model=config.model,
+ max_tokens=config.max_tokens,
+ temperature=config.temperature,
+ timeout=config.timeout,
+ max_concurrent=config.max_concurrent,
+ )
+
+ # 4. 추론 실행
+ inference_output = None
+ if config.output_dir:
+ config.output_dir.mkdir(parents=True, exist_ok=True)
+ inference_output = config.output_dir / f"{config.domain}_inference.json"
+
+ logger.info(f"Running inference on {len(requests)} requests...")
+ responses = await run_inference(client, requests, output_path=inference_output)
+
+ # 5. 평가
+ predictions = [
+ {
+ "id": r.id,
+ "prediction": r.prediction,
+ "ground_truth": r.ground_truth,
+ "qa_type": r.qa_type,
+ "question": dataset.items[i].question if i < len(dataset.items) else "",
+ }
+ for i, r in enumerate(responses)
+ ]
+
+ # Judge 클라이언트 설정
+ judge_client = None
+ if config.use_judge:
+ judge_client = create_judge_client(
+ provider=config.judge_provider,
+ model=config.judge_model,
+ api_key=config.judge_api_key,
+ )
+
+ questions = [item.question for item in dataset.items]
+ results, aggregated = await evaluate_predictions(
+ predictions,
+ use_judge=config.use_judge,
+ judge_client=judge_client,
+ questions=questions,
+ )
+
+ # 메타데이터
+ metadata = {
+ "domain": config.domain,
+ "model": config.model,
+ "vllm_url": config.vllm_url,
+ "total_items": len(dataset),
+ "type_distribution": dataset.get_type_distribution(),
+ "timestamp": datetime.now().isoformat(),
+ }
+
+ return results, aggregated, metadata
+
+
+async def evaluate_all_domains(
+ config: EvalConfig,
+) -> Dict[str, tuple[List[EvalResult], AggregatedMetrics]]:
+ """
+ 모든 도메인에 대해 평가를 실행합니다.
+
+ Args:
+ config: 기본 평가 설정 (domain 필드는 무시됨)
+
+ Returns:
+ 도메인별 결과 딕셔너리
+ """
+ all_results = {}
+
+ for domain in DOMAIN_DIRS.keys():
+ domain_config = EvalConfig(
+ domain=domain,
+ vllm_url=config.vllm_url,
+ model=config.model,
+ max_tokens=config.max_tokens,
+ temperature=config.temperature,
+ max_concurrent=config.max_concurrent,
+ timeout=config.timeout,
+ use_judge=config.use_judge,
+ judge_provider=config.judge_provider,
+ judge_model=config.judge_model,
+ judge_api_key=config.judge_api_key,
+ output_dir=config.output_dir,
+ limit=config.limit,
+ qa_types=config.qa_types,
+ )
+
+ try:
+ results, aggregated, metadata = await evaluate_domain(domain_config)
+ all_results[domain] = (results, aggregated, metadata)
+ print_report(aggregated)
+ except Exception as e:
+ logger.error(f"Failed to evaluate domain {domain}: {e}")
+ all_results[domain] = ([], AggregatedMetrics(), {"error": str(e)})
+
+ return all_results
+
+
+def save_results(
+ results: List[EvalResult],
+ aggregated: AggregatedMetrics,
+ metadata: Dict[str, Any],
+ output_dir: Path,
+ domain: str,
+) -> None:
+ """결과를 파일로 저장합니다."""
+ output_dir.mkdir(parents=True, exist_ok=True)
+
+ # 전체 리포트
+ report = generate_report(results, aggregated, metadata)
+ report_path = output_dir / f"{domain}_evaluation_report.json"
+ with open(report_path, "w", encoding="utf-8") as f:
+ json.dump(report, ensure_ascii=False, indent=2, fp=f)
+ logger.info(f"Saved report to {report_path}")
+
+ # 요약 결과 (CSV 친화적)
+ summary_path = output_dir / f"{domain}_summary.json"
+ summary = {
+ "domain": domain,
+ "total_count": aggregated.total_count,
+ "exact_match": aggregated.exact_match_avg,
+ "f1_score": aggregated.f1_score_avg,
+ "contains_match": aggregated.contains_match_avg,
+ "bleu_score": aggregated.bleu_score_avg,
+ "by_type": aggregated.by_type,
+ }
+ if aggregated.judge_overall_avg is not None:
+ summary["judge_overall"] = aggregated.judge_overall_avg
+ summary["judge_accuracy"] = aggregated.judge_accuracy
+
+ with open(summary_path, "w", encoding="utf-8") as f:
+ json.dump(summary, ensure_ascii=False, indent=2, fp=f)
+ logger.info(f"Saved summary to {summary_path}")
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description="vLLM 서버를 사용한 Table QA 평가",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog="""
+Examples:
+ # 단일 도메인 평가
+ python -m eval.evaluate_vllm --domain public --vllm-url http://localhost:8000/v1
+
+ # 모든 도메인 평가
+ python -m eval.evaluate_vllm --all-domains --vllm-url http://localhost:8000/v1
+
+ # 특정 모델 사용
+ python -m eval.evaluate_vllm --domain business --model Qwen/Qwen2-VL-7B-Instruct
+
+ # LLM-as-Judge 포함
+ python -m eval.evaluate_vllm --domain finance --use-judge --judge-model gpt-4o
+
+ # 특정 QA 타입만 평가
+ python -m eval.evaluate_vllm --domain public --qa-types lookup compare
+
+ # 제한된 샘플로 테스트
+ python -m eval.evaluate_vllm --domain public --limit 10
+ """,
+ )
+
+ # 필수 인자
+ parser.add_argument(
+ "--domain",
+ choices=list(DOMAIN_DIRS.keys()),
+ help="평가할 도메인",
+ )
+ parser.add_argument(
+ "--all-domains",
+ action="store_true",
+ help="모든 도메인 평가",
+ )
+
+ # vLLM 설정
+ parser.add_argument(
+ "--vllm-url",
+ default="http://localhost:8000/v1",
+ help="vLLM 서버 URL (default: http://localhost:8000/v1)",
+ )
+ parser.add_argument(
+ "--model",
+ default="default",
+ help="사용할 모델 이름 (default: vLLM에서 로드된 모델 사용)",
+ )
+ parser.add_argument(
+ "--max-tokens",
+ type=int,
+ default=512,
+ help="최대 생성 토큰 수 (default: 512)",
+ )
+ parser.add_argument(
+ "--temperature",
+ type=float,
+ default=0.0,
+ help="생성 온도 (default: 0.0)",
+ )
+ parser.add_argument(
+ "--max-concurrent",
+ type=int,
+ default=10,
+ help="최대 동시 요청 수 (default: 10)",
+ )
+ parser.add_argument(
+ "--timeout",
+ type=float,
+ default=120.0,
+ help="요청 타임아웃(초) (default: 120.0)",
+ )
+
+ # Judge 설정
+ parser.add_argument(
+ "--use-judge",
+ action="store_true",
+ help="LLM-as-Judge 평가 사용",
+ )
+ parser.add_argument(
+ "--judge-provider",
+ default="openai",
+ choices=["openai", "anthropic"],
+ help="Judge 제공자 (default: openai)",
+ )
+ parser.add_argument(
+ "--judge-model",
+ help="Judge 모델 (default: gpt-4o-mini)",
+ )
+ parser.add_argument(
+ "--judge-api-key",
+ help="Judge API 키 (환경변수에서 가져오지 않을 경우)",
+ )
+
+ # 출력 설정
+ parser.add_argument(
+ "--output-dir",
+ type=Path,
+ default=Path("eval_results"),
+ help="결과 저장 디렉토리 (default: eval_results)",
+ )
+
+ # 필터링 옵션
+ parser.add_argument(
+ "--limit",
+ type=int,
+ help="평가할 최대 샘플 수 (디버깅용)",
+ )
+ parser.add_argument(
+ "--qa-types",
+ nargs="+",
+ help="특정 QA 타입만 평가 (예: lookup compare arithmetic)",
+ )
+
+ args = parser.parse_args()
+
+ # 인자 검증
+ if not args.domain and not args.all_domains:
+ parser.error("--domain 또는 --all-domains 중 하나를 지정해야 합니다.")
+
+ # 설정 생성
+ config = EvalConfig(
+ domain=args.domain or "public", # all-domains일 때 기본값
+ vllm_url=args.vllm_url,
+ model=args.model,
+ max_tokens=args.max_tokens,
+ temperature=args.temperature,
+ max_concurrent=args.max_concurrent,
+ timeout=args.timeout,
+ use_judge=args.use_judge,
+ judge_provider=args.judge_provider,
+ judge_model=args.judge_model,
+ judge_api_key=args.judge_api_key,
+ output_dir=args.output_dir,
+ limit=args.limit,
+ qa_types=args.qa_types,
+ )
+
+ # 평가 실행
+ if args.all_domains:
+ all_results = asyncio.run(evaluate_all_domains(config))
+
+ # 전체 요약 저장
+ if config.output_dir:
+ config.output_dir.mkdir(parents=True, exist_ok=True)
+
+ all_summary = {}
+ for domain, (results, aggregated, metadata) in all_results.items():
+ if results:
+ save_results(results, aggregated, metadata, config.output_dir, domain)
+ all_summary[domain] = {
+ "total_count": aggregated.total_count,
+ "exact_match": aggregated.exact_match_avg,
+ "f1_score": aggregated.f1_score_avg,
+ }
+
+ summary_path = config.output_dir / "all_domains_summary.json"
+ with open(summary_path, "w", encoding="utf-8") as f:
+ json.dump(all_summary, ensure_ascii=False, indent=2, fp=f)
+ logger.info(f"Saved all-domains summary to {summary_path}")
+ else:
+ results, aggregated, metadata = asyncio.run(evaluate_domain(config))
+ print_report(aggregated)
+
+ if config.output_dir and results:
+ save_results(results, aggregated, metadata, config.output_dir, config.domain)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/filter_qa_by_difficulty.py b/filter_qa_by_difficulty.py
new file mode 100755
index 0000000..1469baa
--- /dev/null
+++ b/filter_qa_by_difficulty.py
@@ -0,0 +1,595 @@
+#!/usr/bin/env python3
+"""
+vLLM 서버를 사용하여 QA 난이도를 측정하고 필터링하는 스크립트.
+
+모델이 너무 쉽게 맞추는 문제(10/10)는 제외하고,
+적당한 난이도(3-6/10 정확도)의 QA만 검수 대상으로 추출합니다.
+
+Usage:
+ # 기본 사용 (business 도메인)
+ python filter_qa_by_difficulty.py --domain business
+
+ # 여러 도메인
+ python filter_qa_by_difficulty.py --all
+
+ # 커스텀 설정
+ python filter_qa_by_difficulty.py --domain business --trials 10 --min-acc 0.3 --max-acc 0.6
+
+ # vLLM 서버 URL 지정
+ python filter_qa_by_difficulty.py --domain business --vllm-url http://localhost:8000/v1
+"""
+
+import argparse
+import base64
+import json
+import logging
+import os
+import sys
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+project_root = Path(__file__).parent
+sys.path.insert(0, str(project_root))
+
+from eval.metrics import normalize_answer, exact_match, f1_score
+
+logging.basicConfig(
+ level=logging.INFO,
+ format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+DOMAIN_DIRS = {
+ "academic": "output_academic",
+ "business": "output_business",
+ "finance": "output_finance",
+ "medical": "output_medical",
+ "public": "output_public",
+}
+
+
+@dataclass
+class FilterConfig:
+ """필터링 설정"""
+ vllm_url: str = "http://localhost:8000/v1"
+ model_name: str = "" # vLLM 서버에서 자동 감지
+ trials: int = 10 # 각 QA당 시도 횟수
+ min_accuracy: float = 0.3 # 최소 정확도 (이상)
+ max_accuracy: float = 0.6 # 최대 정확도 (이하)
+ temperature: float = 0.7 # 다양한 응답을 위해
+ max_tokens: int = 512
+ timeout: int = 60
+ max_workers: int = 4 # 병렬 처리
+
+
+@dataclass
+class QADifficultyResult:
+ """QA 난이도 측정 결과"""
+ pair_id: str
+ table_index: int
+ qa_index: int
+ question: str
+ answer: str
+ qa_type: str
+ correct_count: int
+ total_trials: int
+ accuracy: float
+ responses: List[str] = field(default_factory=list)
+ difficulty_category: str = "" # easy, medium, hard, very_hard
+
+ def to_dict(self) -> Dict[str, Any]:
+ return {
+ "pair_id": self.pair_id,
+ "table_index": self.table_index,
+ "qa_index": self.qa_index,
+ "question": self.question,
+ "answer": self.answer,
+ "qa_type": self.qa_type,
+ "correct_count": self.correct_count,
+ "total_trials": self.total_trials,
+ "accuracy": self.accuracy,
+ "difficulty_category": self.difficulty_category,
+ "sample_responses": self.responses[:3], # 샘플만 저장
+ }
+
+
+def get_vllm_model_name(vllm_url: str) -> str:
+ """vLLM 서버에서 모델 이름 가져오기"""
+ import requests
+ try:
+ response = requests.get(f"{vllm_url}/models", timeout=10)
+ response.raise_for_status()
+ models = response.json().get("data", [])
+ if models:
+ return models[0]["id"]
+ except Exception as e:
+ logger.warning(f"Failed to get model name from vLLM: {e}")
+ return "default"
+
+
+def find_table_images(output_dir: Path, pair_id: str) -> List[Path]:
+ """pair_id에 해당하는 테이블 이미지 찾기"""
+ images_dir = output_dir / "images"
+ if not images_dir.exists():
+ return []
+
+ found_images = []
+ for img_file in sorted(images_dir.glob(f"{pair_id}_table_*.png")):
+ found_images.append(img_file)
+
+ return found_images
+
+
+def encode_image_base64(image_path: Path) -> str:
+ """이미지를 base64로 인코딩"""
+ with open(image_path, "rb") as f:
+ return base64.b64encode(f.read()).decode("utf-8")
+
+
+def run_single_inference(
+ vllm_url: str,
+ model_name: str,
+ image_base64: str,
+ question: str,
+ config: FilterConfig,
+) -> Optional[str]:
+ """단일 추론 실행"""
+ import requests
+
+ messages = [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": f"data:image/png;base64,{image_base64}"
+ }
+ },
+ {
+ "type": "text",
+ "text": f"Based on the table image, answer the following question concisely.\n\nQuestion: {question}\n\nAnswer:"
+ }
+ ]
+ }
+ ]
+
+ try:
+ response = requests.post(
+ f"{vllm_url}/chat/completions",
+ json={
+ "model": model_name,
+ "messages": messages,
+ "max_tokens": config.max_tokens,
+ "temperature": config.temperature,
+ },
+ timeout=config.timeout,
+ )
+ response.raise_for_status()
+ result = response.json()
+ return result["choices"][0]["message"]["content"].strip()
+ except Exception as e:
+ logger.debug(f"Inference failed: {e}")
+ return None
+
+
+def check_answer_correct(prediction: str, ground_truth: str) -> bool:
+ """답변이 맞는지 확인 (EM 또는 F1 > 0.8)"""
+ if not prediction:
+ return False
+
+ # Exact match
+ if exact_match(prediction, ground_truth):
+ return True
+
+ # F1 score > 0.8
+ if f1_score(prediction, ground_truth) > 0.8:
+ return True
+
+ # 정규화된 답변이 포함되는지 확인
+ norm_pred = normalize_answer(prediction)
+ norm_gt = normalize_answer(ground_truth)
+ if norm_gt in norm_pred or norm_pred in norm_gt:
+ return True
+
+ return False
+
+
+def measure_qa_difficulty(
+ vllm_url: str,
+ model_name: str,
+ image_base64: str,
+ question: str,
+ answer: str,
+ config: FilterConfig,
+) -> Tuple[int, List[str]]:
+ """QA 난이도 측정 (여러 번 시도)"""
+ correct_count = 0
+ responses = []
+
+ for trial in range(config.trials):
+ response = run_single_inference(
+ vllm_url, model_name, image_base64, question, config
+ )
+ if response:
+ responses.append(response)
+ if check_answer_correct(response, answer):
+ correct_count += 1
+
+ return correct_count, responses
+
+
+def categorize_difficulty(accuracy: float) -> str:
+ """정확도에 따라 난이도 분류"""
+ if accuracy >= 0.9:
+ return "too_easy"
+ elif accuracy >= 0.7:
+ return "easy"
+ elif accuracy >= 0.3:
+ return "medium" # 목표 범위
+ elif accuracy > 0:
+ return "hard"
+ else:
+ return "very_hard"
+
+
+def filter_qa_for_domain(
+ domain: str,
+ config: FilterConfig,
+ limit: Optional[int] = None,
+ dry_run: bool = False,
+) -> Dict[str, Any]:
+ """도메인의 QA를 필터링"""
+ domain_dir = DOMAIN_DIRS.get(domain)
+ if not domain_dir:
+ raise ValueError(f"Unknown domain: {domain}")
+
+ output_dir = project_root / domain_dir
+ pipeline_output_path = output_dir / "pipeline_output.json"
+
+ if not pipeline_output_path.exists():
+ raise FileNotFoundError(f"pipeline_output.json not found: {pipeline_output_path}")
+
+ # 이미지 디렉토리
+ images_dir = output_dir / "images"
+
+ # 데이터 로드
+ with open(pipeline_output_path, "r", encoding="utf-8") as f:
+ data = json.load(f)
+
+ logger.info(f"Loaded {len(data)} entries from {pipeline_output_path}")
+
+ if limit:
+ data = data[:limit]
+ logger.info(f"Limited to {limit} entries")
+
+ if dry_run:
+ # QA 수 확인만 (이미지 없어도 OK)
+ total_qa = sum(len(entry.get("qa_results", [])) for entry in data)
+ images_exist = images_dir.exists()
+ image_count = len(list(images_dir.glob("*.png"))) if images_exist else 0
+ logger.info(f"Dry run: {len(data)} entries, {total_qa} QA pairs")
+ logger.info(f"Images directory: {'exists' if images_exist else 'NOT FOUND'} ({image_count} images)")
+ if not images_exist:
+ logger.warning("이미지 디렉토리가 없습니다. 먼저 capture_html_to_images.py를 실행하세요.")
+ return {
+ "domain": domain,
+ "entries": len(data),
+ "total_qa": total_qa,
+ "images_exist": images_exist,
+ "image_count": image_count,
+ "dry_run": True,
+ }
+
+ # 이미지 디렉토리 확인 (실제 실행 시)
+ if not images_dir.exists():
+ raise FileNotFoundError(
+ f"Images directory not found: {images_dir}\n"
+ "먼저 capture_html_to_images.py를 실행하여 HTML을 이미지로 변환하세요."
+ )
+
+ # vLLM 모델 이름 가져오기
+ model_name = config.model_name or get_vllm_model_name(config.vllm_url)
+ logger.info(f"Using model: {model_name}")
+
+ # 결과 수집
+ all_results: List[QADifficultyResult] = []
+ stats = {
+ "total_qa": 0,
+ "too_easy": 0,
+ "easy": 0,
+ "medium": 0,
+ "hard": 0,
+ "very_hard": 0,
+ "skipped": 0,
+ }
+
+ for entry_idx, entry in enumerate(data):
+ pair_id = entry.get("pair_id", entry.get("name", f"entry_{entry_idx}"))
+ qa_results = entry.get("qa_results", [])
+
+ if not qa_results:
+ continue
+
+ # 이미지 찾기
+ image_files = find_table_images(output_dir, pair_id)
+ if not image_files:
+ logger.warning(f"No images found for {pair_id}, skipping")
+ stats["skipped"] += len(qa_results)
+ continue
+
+ # 첫 번째 이미지 사용 (TODO: 멀티 이미지 지원)
+ image_base64 = encode_image_base64(image_files[0])
+
+ logger.info(f"[{entry_idx + 1}/{len(data)}] Processing {pair_id} ({len(qa_results)} QAs)")
+
+ for qa_idx, qa in enumerate(qa_results):
+ question = qa.get("question", "")
+ answer = qa.get("answer", "")
+ qa_type = qa.get("type", "unknown")
+
+ if not question or not answer:
+ stats["skipped"] += 1
+ continue
+
+ stats["total_qa"] += 1
+
+ # 난이도 측정
+ correct_count, responses = measure_qa_difficulty(
+ config.vllm_url,
+ model_name,
+ image_base64,
+ question,
+ answer,
+ config,
+ )
+
+ accuracy = correct_count / config.trials if config.trials > 0 else 0
+ difficulty = categorize_difficulty(accuracy)
+ stats[difficulty] += 1
+
+ result = QADifficultyResult(
+ pair_id=pair_id,
+ table_index=0,
+ qa_index=qa_idx,
+ question=question,
+ answer=answer,
+ qa_type=qa_type,
+ correct_count=correct_count,
+ total_trials=config.trials,
+ accuracy=accuracy,
+ responses=responses,
+ difficulty_category=difficulty,
+ )
+ all_results.append(result)
+
+ # 진행 상황 로그
+ status = "✓" if config.min_accuracy <= accuracy <= config.max_accuracy else "✗"
+ logger.info(f" [{qa_idx + 1}/{len(qa_results)}] {qa_type}: {correct_count}/{config.trials} ({accuracy:.0%}) [{difficulty}] {status}")
+
+ # 필터링 (목표 난이도 범위)
+ filtered_results = [
+ r for r in all_results
+ if config.min_accuracy <= r.accuracy <= config.max_accuracy
+ ]
+
+ # 결과 저장
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+ output_file = output_dir / f"qa_difficulty_analysis_{timestamp}.json"
+
+ output_data = {
+ "domain": domain,
+ "config": {
+ "vllm_url": config.vllm_url,
+ "model_name": model_name,
+ "trials": config.trials,
+ "min_accuracy": config.min_accuracy,
+ "max_accuracy": config.max_accuracy,
+ "temperature": config.temperature,
+ },
+ "stats": stats,
+ "filtered_count": len(filtered_results),
+ "all_results": [r.to_dict() for r in all_results],
+ "filtered_for_review": [r.to_dict() for r in filtered_results],
+ "timestamp": timestamp,
+ }
+
+ with open(output_file, "w", encoding="utf-8") as f:
+ json.dump(output_data, f, ensure_ascii=False, indent=2)
+
+ logger.info(f"Results saved to {output_file}")
+
+ # 검수용 간단 리스트 저장
+ review_file = output_dir / f"qa_for_review_{timestamp}.json"
+ review_data = {
+ "domain": domain,
+ "description": f"QA pairs with accuracy between {config.min_accuracy:.0%} and {config.max_accuracy:.0%}",
+ "count": len(filtered_results),
+ "items": [
+ {
+ "pair_id": r.pair_id,
+ "qa_type": r.qa_type,
+ "question": r.question,
+ "answer": r.answer,
+ "accuracy": f"{r.accuracy:.0%} ({r.correct_count}/{r.total_trials})",
+ "sample_model_responses": r.responses[:3],
+ }
+ for r in filtered_results
+ ],
+ }
+
+ with open(review_file, "w", encoding="utf-8") as f:
+ json.dump(review_data, f, ensure_ascii=False, indent=2)
+
+ logger.info(f"Review list saved to {review_file}")
+
+ return {
+ "domain": domain,
+ "total_qa": stats["total_qa"],
+ "stats": stats,
+ "filtered_for_review": len(filtered_results),
+ "output_file": str(output_file),
+ "review_file": str(review_file),
+ }
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description="vLLM을 사용하여 QA 난이도를 측정하고 검수 대상을 필터링합니다.",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog="""
+Examples:
+ # business 도메인 필터링
+ python filter_qa_by_difficulty.py --domain business
+
+ # 모든 도메인
+ python filter_qa_by_difficulty.py --all
+
+ # 커스텀 설정 (5회 시도, 20-50% 정확도)
+ python filter_qa_by_difficulty.py --domain business --trials 5 --min-acc 0.2 --max-acc 0.5
+
+ # vLLM 서버 지정
+ python filter_qa_by_difficulty.py --domain business --vllm-url http://localhost:8000/v1
+
+ # 테스트 (3개 entry만)
+ python filter_qa_by_difficulty.py --domain business --limit 3
+
+Difficulty Categories:
+ - too_easy: 90-100% accuracy (제외)
+ - easy: 70-89% accuracy
+ - medium: 30-69% accuracy (검수 대상)
+ - hard: 1-29% accuracy
+ - very_hard: 0% accuracy
+ """
+ )
+
+ parser.add_argument(
+ "--domain",
+ nargs="+",
+ choices=list(DOMAIN_DIRS.keys()),
+ help="필터링할 도메인(들)",
+ )
+ parser.add_argument(
+ "--all",
+ action="store_true",
+ help="모든 도메인 필터링",
+ )
+ parser.add_argument(
+ "--vllm-url",
+ default="http://localhost:8000/v1",
+ help="vLLM 서버 URL (default: http://localhost:8000/v1)",
+ )
+ parser.add_argument(
+ "--model",
+ default="",
+ help="모델 이름 (미지정시 vLLM에서 자동 감지)",
+ )
+ parser.add_argument(
+ "--trials",
+ type=int,
+ default=10,
+ help="각 QA당 시도 횟수 (default: 10)",
+ )
+ parser.add_argument(
+ "--min-acc",
+ type=float,
+ default=0.3,
+ help="최소 정확도 (default: 0.3)",
+ )
+ parser.add_argument(
+ "--max-acc",
+ type=float,
+ default=0.6,
+ help="최대 정확도 (default: 0.6)",
+ )
+ parser.add_argument(
+ "--temperature",
+ type=float,
+ default=0.7,
+ help="샘플링 temperature (default: 0.7)",
+ )
+ parser.add_argument(
+ "--limit",
+ type=int,
+ help="처리할 최대 entry 수 (테스트용)",
+ )
+ parser.add_argument(
+ "--dry-run",
+ action="store_true",
+ help="실제 추론 없이 확인만",
+ )
+
+ args = parser.parse_args()
+
+ # 도메인 결정
+ if args.all:
+ domains = list(DOMAIN_DIRS.keys())
+ elif args.domain:
+ domains = args.domain
+ else:
+ parser.error("--domain 또는 --all을 지정해야 합니다.")
+
+ # 설정
+ config = FilterConfig(
+ vllm_url=args.vllm_url,
+ model_name=args.model,
+ trials=args.trials,
+ min_accuracy=args.min_acc,
+ max_accuracy=args.max_acc,
+ temperature=args.temperature,
+ )
+
+ logger.info(f"Domains: {domains}")
+ logger.info(f"Config: trials={config.trials}, accuracy range={config.min_accuracy:.0%}-{config.max_accuracy:.0%}")
+
+ # 각 도메인 처리
+ results = []
+ for domain in domains:
+ logger.info(f"\n{'='*60}")
+ logger.info(f"Processing domain: {domain}")
+ logger.info(f"{'='*60}")
+
+ try:
+ result = filter_qa_for_domain(
+ domain=domain,
+ config=config,
+ limit=args.limit,
+ dry_run=args.dry_run,
+ )
+ results.append(result)
+ except Exception as e:
+ logger.error(f"Failed to process {domain}: {e}")
+ results.append({"domain": domain, "error": str(e)})
+
+ # 요약
+ print("\n" + "=" * 60)
+ print(" QA Difficulty Filtering Summary")
+ print("=" * 60)
+ for result in results:
+ domain = result.get("domain", "unknown")
+ if "error" in result:
+ print(f" {domain}: ERROR - {result['error']}")
+ elif result.get("dry_run"):
+ img_status = "✓" if result.get("images_exist") else "✗ (run capture_html_to_images.py first)"
+ print(f" {domain}: {result.get('total_qa', 0)} QA pairs, {result.get('image_count', 0)} images {img_status} (dry run)")
+ else:
+ stats = result.get("stats", {})
+ filtered = result.get("filtered_for_review", 0)
+ total = result.get("total_qa", 0)
+ print(f" {domain}:")
+ print(f" Total QA: {total}")
+ print(f" too_easy: {stats.get('too_easy', 0)}, easy: {stats.get('easy', 0)}")
+ print(f" medium: {stats.get('medium', 0)}, hard: {stats.get('hard', 0)}, very_hard: {stats.get('very_hard', 0)}")
+ print(f" → For review: {filtered} ({filtered/total*100:.1f}% of total)" if total > 0 else "")
+ print("=" * 60)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/generate_synthetic_table/flow.py b/generate_synthetic_table/flow.py
index 8be68dd..ce2d976 100644
--- a/generate_synthetic_table/flow.py
+++ b/generate_synthetic_table/flow.py
@@ -49,6 +49,7 @@ class TableState(TypedDict, total=False):
synthetic_json: dict # 파싱된 합성 데이터 JSON
qa_results: List[Dict] # 생성된 QA 쌍
token_usage: int # QA 생성에 사용된 총 토큰 수
+ is_multi_image: bool # 다중 이미지 입력 여부 (cross-image QA 생성됨)
def _encode_image(image_path: Path) -> str:
@@ -684,12 +685,65 @@ def _node(state: TableState) -> TableState:
return _node
+def generate_long_sequence_node(llm: ChatOpenAI) -> Callable[[TableState], TableState]:
+ """Generate long_sequence QA pair separately (context-dependent questions)."""
+
+ def _node(state: TableState) -> TableState:
+ logger.info("Entering node: generate_long_sequence")
+
+ # Try to load long_sequence prompt, skip if not available
+ try:
+ prompt_template = _load_prompt("generate_long_sequence", state.get("domain"))
+ except ValueError:
+ logger.info("No generate_long_sequence prompt found, skipping long_sequence generation")
+ return state
+
+ if state.get("errors"):
+ return state
+
+ synthetic_html = state.get("synthetic_table")
+ if not synthetic_html:
+ logger.warning("No synthetic table for long_sequence generation, skipping")
+ return state
+
+ try:
+ prompt = prompt_template.format(synthetic_html=synthetic_html)
+ except KeyError as e:
+ logger.warning(f"long_sequence prompt missing placeholder: {e}, skipping")
+ return state
+
+ response_text, token_usage = _call_llm(llm, prompt, return_token_usage=True)
+
+ logger.info(f"Long sequence generation token usage: {token_usage}")
+
+ response_json = robust_json_parse(response_text)
+
+ if response_json and "qa_pairs" in response_json:
+ long_seq_qa = response_json["qa_pairs"]
+ # Append to existing qa_results
+ existing_qa = list(state.get("qa_results", []))
+ existing_qa.extend(long_seq_qa)
+ # Update token usage
+ existing_token_usage = state.get("token_usage", 0)
+ total_token_usage = existing_token_usage + token_usage
+ logger.info(f"Added {len(long_seq_qa)} long_sequence QA pairs. Total QA: {len(existing_qa)}")
+ return {**state, "qa_results": existing_qa, "token_usage": total_token_usage}
+ else:
+ logger.warning("long_sequence generation did not return valid JSON or 'qa_pairs' key.")
+ return state
+
+ return _node
+
+
def generate_qa_from_image_node(llm: ChatOpenAI) -> Callable[[TableState], TableState]:
- """Generate QA pairs directly from image (QA-only mode)."""
+ """Generate QA pairs directly from image (QA-only mode).
+
+ If multiple images are provided, uses 'generate_qa_from_multi_image' prompt
+ to generate cross-image QA pairs that require understanding multiple tables.
+ """
def _node(state: TableState) -> TableState:
logger.info("Entering node: generate_qa_from_image")
- prompt_template = _load_prompt("generate_qa_from_image", state.get("domain"))
if state.get("errors"):
return state
@@ -711,6 +765,18 @@ def _node(state: TableState) -> TableState:
else:
logger.warning(f"Skipping missing image in batch: {img_p}")
+ # Use multi-image prompt if there are multiple images
+ is_multi_image = len(image_data_urls) > 1
+ if is_multi_image:
+ logger.info(f"Multi-image mode detected: {len(image_data_urls)} images. Using cross-image QA prompt.")
+ try:
+ prompt_template = _load_prompt("generate_qa_from_multi_image", state.get("domain"))
+ except ValueError:
+ logger.warning("Multi-image prompt not found, falling back to single-image prompt")
+ prompt_template = _load_prompt("generate_qa_from_image", state.get("domain"))
+ else:
+ prompt_template = _load_prompt("generate_qa_from_image", state.get("domain"))
+
prompt = prompt_template
response_text, token_usage = _call_llm(llm, prompt, image_urls=image_data_urls, return_token_usage=True)
@@ -727,7 +793,10 @@ def _node(state: TableState) -> TableState:
logger.warning("QA generation from image did not return valid JSON or 'qa_pairs' key.")
logger.info(f"Returning token_usage: {token_usage}")
- return {**state, "qa_results": qa_results, "token_usage": token_usage}
+ result_state = {**state, "qa_results": qa_results, "token_usage": token_usage}
+ if is_multi_image:
+ result_state["is_multi_image"] = True
+ return result_state
return _node
@@ -800,6 +869,7 @@ def build_synthetic_table_graph(
if not skip_qa:
graph.add_node("generate_qa", generate_qa_node(llm))
+ graph.add_node("generate_long_sequence", generate_long_sequence_node(llm))
# Routing based on provider and input type
def route_start(state: TableState) -> str:
@@ -852,7 +922,8 @@ def route_start(state: TableState) -> str:
graph.add_edge("parse_synthetic_table", END)
else:
graph.add_edge("parse_synthetic_table", "generate_qa")
- graph.add_edge("generate_qa", END)
+ graph.add_edge("generate_qa", "generate_long_sequence")
+ graph.add_edge("generate_long_sequence", END)
return graph
diff --git a/generate_synthetic_table/prompts/academic.yaml b/generate_synthetic_table/prompts/academic.yaml
index 87543eb..ea2caed 100644
--- a/generate_synthetic_table/prompts/academic.yaml
+++ b/generate_synthetic_table/prompts/academic.yaml
@@ -6,7 +6,7 @@ generate_qa: |
{synthetic_html}
### [Instructions]
- 1. **Candidate Generation & Filtering**: Internally generate 10 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 10 pairs)
+ 1. **Candidate Generation & Filtering**: Internally generate 9 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 9 pairs)
2. **Domain Suitability**: Questions must maintain an academic tone and accurately handle experimental results, performance metrics (Accuracy, F1-score, etc.), model names, and statistical significance.
3. **Strict Constraints**:
- Answers must be derived ONLY from the table (and provided context). No external knowledge.
@@ -14,6 +14,7 @@ generate_qa: |
- Output format must strictly follow JSON.
- Questions and Answers MUST be written in Korean.
- reasoning_annotation MUST be written in English and MUST be a single string (not a list).
+ - **DO NOT use real model/dataset names** (e.g., BERT, GPT, ResNet, ImageNet). Use fictional names like "Model-A", "Dataset-X", "Method-알파".
### [Validation Criteria]
- Is the answer uniquely determined within the table?
@@ -21,16 +22,15 @@ generate_qa: |
- Is the question clear and unambiguous? (e.g., "Best model" -> "Model with highest Accuracy")
### [Reasoning Type Definitions (Academic Domain)]
- (1) lookup: Retrieve specific model performance or value without condition/calculation. (e.g., "What is the ImageNet Top-1 Accuracy of ResNet-50?")
+ (1) lookup: Retrieve specific model performance or value without condition/calculation. (e.g., "What is the Top-1 Accuracy of Model-A?")
(2) filter: Select rows/columns meeting specific conditions (performance, params, etc.). (e.g., "List all models with parameters under 10M.")
- (3) aggregate: Statistical aggregation of experimental results (Sum, Avg, Max, Min, Count). (e.g., "What is the average F1-score of all BERT variants?")
- (4) compare: Compare performance against baseline or between models. (e.g., "Does the Proposed Method have a higher BLEU score than SOTA?")
+ (3) aggregate: Statistical aggregation of experimental results (Sum, Avg, Max, Min, Count). (e.g., "What is the average F1-score of all model variants?")
+ (4) compare: Compare performance against baseline or between models. (e.g., "Does the Proposed Method have a higher BLEU score than the baseline?")
(5) arithmetic: Specific calculation beyond simple comparison (difference, growth rate). (e.g., "What is the percentage improvement of Large model over Base model?")
(6) temporal: Deduce trends over years or epochs. (e.g., "Which model published after 2020 has the best performance?")
(7) multi_hop: Multi-step inference finding a value first, then using it as a key. (e.g., "What is the Precision of the model with the highest Recall?")
(8) implicit_reference: Referring to specific metrics contextually without explicit column name. (e.g., "Which is the best performing model?" -> implies Bolded value or Accuracy column)
(9) ellipsis: Recovering omitted comparisons or criteria from table structure. (e.g., "How much did performance drop in 'w/o attention'?" -> implies comparison to Full Model)
- (10) long_sequence (Context-Dependent): Requires interpreting 'Experimental Setup' or 'Hypothesis' text (Context) to filter table data. **Requirement**: Must generate a hypothetical [Context] paragraph needed to solve the question.
### [Output Format (JSON)]
{{
@@ -42,7 +42,7 @@ generate_qa: |
"reasoning_annotation": "Step-by-step logic to derive answer (MUST be a string, not a list)",
"context": null
}},
- ... (One per Reasoning Type => Total 10)
+ ... (One per Reasoning Type => Total 9)
]
}}
@@ -51,7 +51,7 @@ generate_qa_from_image: |
Your mission is to analyze the provided academic table image and generate Question-Answer (QA) pairs that fit the specified Reasoning Type definitions.
### [Instructions]
- 1. **Candidate Generation & Filtering**: Internally generate 10 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 10 pairs)
+ 1. **Candidate Generation & Filtering**: Internally generate 9 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 9 pairs)
2. **Domain Suitability**: Questions must maintain an academic tone and accurately handle experimental results, performance metrics (Accuracy, F1-score, etc.), model names, and statistical significance.
3. **Strict Constraints**:
- Answers must be derived ONLY from the table (and provided context). No external knowledge.
@@ -59,6 +59,7 @@ generate_qa_from_image: |
- Output format must strictly follow JSON.
- Questions and Answers MUST be written in Korean.
- reasoning_annotation MUST be written in English and MUST be a single string (not a list).
+ - **DO NOT use real model/dataset names** (e.g., BERT, GPT, ResNet, ImageNet). Use fictional names like "Model-A", "Dataset-X", "Method-알파".
### [Validation Criteria]
- Is the answer uniquely determined within the table?
@@ -66,16 +67,15 @@ generate_qa_from_image: |
- Is the question clear and unambiguous? (e.g., "Best model" -> "Model with highest Accuracy")
### [Reasoning Type Definitions (Academic Domain)]
- (1) lookup: Retrieve specific model performance or value without condition/calculation. (e.g., "What is the ImageNet Top-1 Accuracy of ResNet-50?")
+ (1) lookup: Retrieve specific model performance or value without condition/calculation. (e.g., "What is the Top-1 Accuracy of Model-A?")
(2) filter: Select rows/columns meeting specific conditions (performance, params, etc.). (e.g., "List all models with parameters under 10M.")
- (3) aggregate: Statistical aggregation of experimental results (Sum, Avg, Max, Min, Count). (e.g., "What is the average F1-score of all BERT variants?")
- (4) compare: Compare performance against baseline or between models. (e.g., "Does the Proposed Method have a higher BLEU score than SOTA?")
+ (3) aggregate: Statistical aggregation of experimental results (Sum, Avg, Max, Min, Count). (e.g., "What is the average F1-score of all model variants?")
+ (4) compare: Compare performance against baseline or between models. (e.g., "Does the Proposed Method have a higher BLEU score than the baseline?")
(5) arithmetic: Specific calculation beyond simple comparison (difference, growth rate). (e.g., "What is the percentage improvement of Large model over Base model?")
(6) temporal: Deduce trends over years or epochs. (e.g., "Which model published after 2020 has the best performance?")
(7) multi_hop: Multi-step inference finding a value first, then using it as a key. (e.g., "What is the Precision of the model with the highest Recall?")
(8) implicit_reference: Referring to specific metrics contextually without explicit column name. (e.g., "Which is the best performing model?" -> implies Bolded value or Accuracy column)
(9) ellipsis: Recovering omitted comparisons or criteria from table structure. (e.g., "How much did performance drop in 'w/o attention'?" -> implies comparison to Full Model)
- (10) long_sequence (Context-Dependent): Requires interpreting 'Experimental Setup' or 'Hypothesis' text (Context) to filter table data. **Requirement**: Must generate a hypothetical [Context] paragraph needed to solve the question.
### [Output Format (JSON)]
{{
@@ -87,7 +87,59 @@ generate_qa_from_image: |
"reasoning_annotation": "Step-by-step logic to derive answer (MUST be a string, not a list)",
"context": null
}},
- ... (One per Reasoning Type => Total 10)
+ ... (One per Reasoning Type => Total 9)
+ ]
+ }}
+ Return ONLY the JSON object.
+
+generate_qa_from_multi_image: |
+ You are an 'AI Data Researcher' specialized in building high-quality QA datasets that require understanding MULTIPLE academic/scientific table images together.
+ Your mission is to analyze ALL provided academic table images and generate Question-Answer (QA) pairs that REQUIRE information from MULTIPLE images to answer.
+
+ **⚠️ CRITICAL REQUIREMENT: CROSS-IMAGE REASONING ⚠️**
+ - Every QA pair MUST require information from AT LEAST TWO images to answer correctly.
+ - Questions answerable from a single image are INVALID.
+ - Focus on comparisons, aggregations, or inferences that span multiple experimental results, model comparisons, or benchmark tables.
+
+ ### [Instructions]
+ 1. **Analyze All Images**: First, understand what data each image contains and how they relate (e.g., different datasets, different model ablations, training vs test results).
+ 2. **Generate Cross-Image QA**: Create 9 diverse QA pairs where each question requires synthesizing information from multiple images.
+ 3. **Strict Constraints**:
+ - Answers must be derived from combining data across images. No external knowledge.
+ - Each QA pair must correspond to exactly one Reasoning Type.
+ - Output format must strictly follow JSON.
+ - Questions and Answers MUST be written in Korean.
+ - reasoning_annotation MUST be written in English, MUST be a single string, and MUST specify which images were used.
+ - **DO NOT use real model/dataset names** (e.g., BERT, GPT, ImageNet). Use fictional names like "Model-A", "Dataset-X", "Method-알파".
+
+ ### [Validation Criteria]
+ - Does the answer REQUIRE data from multiple images? (Single-image answers are INVALID)
+ - Is the reasoning process logically flawless?
+ - Is the question clear about what experimental data is being compared or combined?
+
+ ### [Cross-Image Reasoning Type Definitions (Academic Domain)]
+ (1) cross_lookup: Retrieve and combine performance values from different result tables. (e.g., "What is Model-A's accuracy on both Dataset-X and Dataset-Y from the two tables?")
+ (2) cross_filter: Filter models across benchmark tables based on conditions. (e.g., "Which models achieve >90% accuracy on both datasets shown in the two images?")
+ (3) cross_aggregate: Aggregate experimental results spanning multiple benchmarks. (e.g., "What is the average F1-score of Method-가 across all evaluation tables?")
+ (4) cross_compare: Compare model performance between different experimental settings. (e.g., "Does the proposed method outperform the baseline on both in-domain and out-of-domain tests?")
+ (5) cross_arithmetic: Calculate performance differences using data from multiple tables. (e.g., "What is the accuracy improvement of Model-A from the ablation table to the full model table?")
+ (6) cross_temporal: Identify experimental trends by combining multiple result tables. (e.g., "Based on both training curves, which model converges faster?")
+ (7) cross_multi_hop: Multi-step academic inference across tables. (e.g., "Find the best model on Dataset-X in Image 1, then find its parameters in Image 2.")
+ (8) cross_implicit: Answer questions requiring implicit understanding of relationships between results. (e.g., "Which approach is most efficient?" requires combining accuracy and parameter count from multiple tables)
+ (9) cross_synthesis: Synthesize research insights only possible by viewing all tables together. (e.g., "Based on both the main results and ablation study, which component contributes most to performance?")
+
+ ### [Output Format (JSON)]
+ {{
+ "qa_pairs": [
+ {{
+ "question": "Question requiring multiple academic images to answer",
+ "answer": "Answer derived from multiple images",
+ "type": "cross_lookup",
+ "reasoning_annotation": "Step 1: From Image 1, extract X. Step 2: From Image 2, extract Y. Step 3: Combine to get answer.",
+ "context": null,
+ "images_used": ["image_1", "image_2"]
+ }},
+ ... (One per Reasoning Type => Total 9)
]
}}
Return ONLY the JSON object.
@@ -111,8 +163,9 @@ generate_synthetic_table: |
3. **⚠️ Data Transformation - ABSOLUTELY MANDATORY ⚠️:**
- **ALL data cell values MUST be replaced with completely new synthetic values.**
- **NEVER copy any original data values** - generate fresh, realistic alternatives.
- - For student/model names: Generate DIFFERENT names
- - For university names: Generate DIFFERENT names
+ - **NEVER use real model/dataset/university names** (BERT, GPT, ResNet, ImageNet, MIT, Stanford, etc.). Use fictional names like "Model-A", "Dataset-X", "University-가".
+ - For student/model names: Generate DIFFERENT fictional names
+ - For university names: Generate DIFFERENT fictional names
- For grades/scores: Generate DIFFERENT realistic values
- For course/research topics: Generate DIFFERENT titles
- For dates: Generate DIFFERENT plausible dates
@@ -151,7 +204,8 @@ generate_synthetic_table_from_image: |
3. **⚠️ Data Generation - ABSOLUTELY CRITICAL ⚠️:**
- **NEVER copy the data values from the image** - this is NOT an OCR task
- **ALL cell content must be completely NEW and DIFFERENT**
- - For student/model names: Generate DIFFERENT names
+ - **NEVER use real model/dataset/university names** (BERT, GPT, ResNet, ImageNet, MIT, Stanford, etc.). Use fictional names like "Model-A", "Dataset-X", "University-가".
+ - For student/model names: Generate DIFFERENT fictional names
- For grades/scores: Generate DIFFERENT values
- For course/research topics: Generate DIFFERENT titles
4. **Styling:** Use **Tailwind CSS** classes exclusively (NO inline styles).
@@ -170,3 +224,40 @@ generate_synthetic_table_from_image: |
- Score in image: "점수A" → Generate: "점수B"
⚠️ If the generated content is identical or very similar to the image, the output is INVALID.
+
+generate_long_sequence: |
+ You are an 'AI Data Researcher' specialized in creating context-dependent QA pairs for academic/scientific tables.
+ Your mission is to generate a single high-quality "long_sequence" type QA pair that requires interpreting external context to answer questions about the table.
+
+ **Input Table:**
+ {synthetic_html}
+
+ ### [Instructions]
+ 1. **Generate ONE long_sequence QA pair** that requires reading and understanding a context paragraph to filter or interpret the table data.
+ 2. **Create a realistic academic context** (e.g., "Experimental Setup", "Research Hypothesis", "Ablation Study Goals") that provides information needed to answer the question.
+ 3. **The question must be unanswerable without the context** - the context should contain key criteria or conditions.
+ 4. **Strict Constraints**:
+ - Answer must be derived from BOTH the table AND the context. Neither alone is sufficient.
+ - Questions and Answers MUST be written in Korean.
+ - reasoning_annotation MUST be written in English and MUST be a single string.
+ - Context must be written in Korean and be 2-4 sentences long.
+ - **DO NOT use real model/dataset names** (e.g., BERT, GPT, ResNet). Use fictional names.
+
+ ### [Example Scenarios (Academic)]
+ - Context describes experimental conditions (dataset size, hardware) → Question asks which models meet the criteria
+ - Context outlines baseline comparison requirements → Question asks which methods show improvement
+ - Context specifies evaluation metrics of interest → Question asks for rankings based on those metrics
+
+ ### [Output Format (JSON)]
+ {{
+ "qa_pairs": [
+ {{
+ "question": "Question requiring context to answer",
+ "answer": "Answer derived from table + context",
+ "type": "long_sequence",
+ "reasoning_annotation": "Step 1: Extract key criteria from context. Step 2: Apply criteria to table. Step 3: Derive answer.",
+ "context": "실험 설정에 따르면... (2-4 sentences of academic context in Korean)"
+ }}
+ ]
+ }}
+ Return ONLY the JSON object.
diff --git a/generate_synthetic_table/prompts/business.yaml b/generate_synthetic_table/prompts/business.yaml
index 18ebc27..40594c5 100644
--- a/generate_synthetic_table/prompts/business.yaml
+++ b/generate_synthetic_table/prompts/business.yaml
@@ -6,7 +6,7 @@ generate_qa: |
{synthetic_html}
### [Instructions]
- 1. **Candidate Generation & Filtering**: Internally generate 10 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 10 pairs)
+ 1. **Candidate Generation & Filtering**: Internally generate 9 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 9 pairs)
2. **Domain Suitability**: Questions must maintain a business tone and accurately handle revenue, profit margins, growth rates, market share, and employee performance metrics.
3. **Strict Constraints**:
- Answers must be derived ONLY from the table (and provided context). No external knowledge.
@@ -14,6 +14,7 @@ generate_qa: |
- Output format must strictly follow JSON.
- Questions and Answers MUST be written in Korean.
- reasoning_annotation MUST be written in English and MUST be a single string (not a list).
+ - **DO NOT use real company names** (e.g., Samsung, Apple, Google). Use fictional names like "A사", "B기업", "가나다 주식회사".
### [Validation Criteria]
- Is the answer uniquely determined within the table?
@@ -30,7 +31,6 @@ generate_qa: |
(7) multi_hop: Multi-step inference finding a value first, then using it as a key. (e.g., "What is the name of the Branch Manager of the branch with the #1 Revenue?")
(8) implicit_reference: Referring to specific metrics contextually without explicit column name. (e.g., "Which is the most profitable project?" -> implies Profit Margin column)
(9) ellipsis: Recovering omitted comparisons or criteria from table structure. (e.g., "What is Q4 performance?" -> implies continuation from Q1-Q3 context)
- (10) long_sequence (Context-Dependent): Requires interpreting 'Management Goals' or 'Market Conditions' text (Context) to filter table data. **Requirement**: Must generate a hypothetical [Context] paragraph needed to solve the question.
### [Output Format (JSON)]
{{
@@ -42,7 +42,7 @@ generate_qa: |
"reasoning_annotation": "Step-by-step logic to derive answer (MUST be a string, not a list)",
"context": null
}},
- ... (One per Reasoning Type => Total 10)
+ ... (One per Reasoning Type => Total 9)
]
}}
@@ -51,7 +51,7 @@ generate_qa_from_image: |
Your mission is to analyze the provided business table image and generate Question-Answer (QA) pairs that fit the specified Reasoning Type definitions.
### [Instructions]
- 1. **Candidate Generation & Filtering**: Internally generate 10 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 10 pairs)
+ 1. **Candidate Generation & Filtering**: Internally generate 9 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 9 pairs)
2. **Domain Suitability**: Questions must maintain a business tone and accurately handle revenue, profit margins, growth rates, market share, and employee performance metrics.
3. **Strict Constraints**:
- Answers must be derived ONLY from the table (and provided context). No external knowledge.
@@ -59,6 +59,7 @@ generate_qa_from_image: |
- Output format must strictly follow JSON.
- Questions and Answers MUST be written in Korean.
- reasoning_annotation MUST be written in English and MUST be a single string (not a list).
+ - **DO NOT use real company names** (e.g., Samsung, Apple, Google). Use fictional names like "A사", "B기업", "가나다 주식회사".
### [Validation Criteria]
- Is the answer uniquely determined within the table?
@@ -75,7 +76,6 @@ generate_qa_from_image: |
(7) multi_hop: Multi-step inference finding a value first, then using it as a key. (e.g., "What is the name of the Branch Manager of the branch with the #1 Revenue?")
(8) implicit_reference: Referring to specific metrics contextually without explicit column name. (e.g., "Which is the most profitable project?" -> implies Profit Margin column)
(9) ellipsis: Recovering omitted comparisons or criteria from table structure. (e.g., "What is Q4 performance?" -> implies continuation from Q1-Q3 context)
- (10) long_sequence (Context-Dependent): Requires interpreting 'Management Goals' or 'Market Conditions' text (Context) to filter table data. **Requirement**: Must generate a hypothetical [Context] paragraph needed to solve the question.
### [Output Format (JSON)]
{{
@@ -87,7 +87,59 @@ generate_qa_from_image: |
"reasoning_annotation": "Step-by-step logic to derive answer (MUST be a string, not a list)",
"context": null
}},
- ... (One per Reasoning Type => Total 10)
+ ... (One per Reasoning Type => Total 9)
+ ]
+ }}
+ Return ONLY the JSON object.
+
+generate_qa_from_multi_image: |
+ You are an 'AI Data Researcher' specialized in building high-quality QA datasets that require understanding MULTIPLE table images together.
+ Your mission is to analyze ALL provided business table images and generate Question-Answer (QA) pairs that REQUIRE information from MULTIPLE images to answer.
+
+ **⚠️ CRITICAL REQUIREMENT: CROSS-IMAGE REASONING ⚠️**
+ - Every QA pair MUST require information from AT LEAST TWO images to answer correctly.
+ - Questions answerable from a single image are INVALID.
+ - Focus on comparisons, aggregations, or inferences that span multiple tables.
+
+ ### [Instructions]
+ 1. **Analyze All Images**: First, understand what data each image contains and how they relate to each other (e.g., same company different periods, different departments, related metrics).
+ 2. **Generate Cross-Image QA**: Create 9 diverse QA pairs where each question requires synthesizing information from multiple images.
+ 3. **Strict Constraints**:
+ - Answers must be derived from combining data across images. No external knowledge.
+ - Each QA pair must correspond to exactly one Reasoning Type.
+ - Output format must strictly follow JSON.
+ - Questions and Answers MUST be written in Korean.
+ - reasoning_annotation MUST be written in English, MUST be a single string, and MUST specify which images were used.
+ - **DO NOT use real company names** (e.g., Samsung, Apple, Google). Use fictional names like "A사", "B기업", "가나다 주식회사".
+
+ ### [Validation Criteria]
+ - Does the answer REQUIRE data from multiple images? (Single-image answers are INVALID)
+ - Is the reasoning process logically flawless?
+ - Is the question clear about what is being compared or combined?
+
+ ### [Cross-Image Reasoning Type Definitions (Business Domain)]
+ (1) cross_lookup: Retrieve and combine specific values from different images. (e.g., "What is the total Q1 revenue of Branch A from both Table 1 and Table 2?")
+ (2) cross_filter: Filter rows across tables based on conditions spanning multiple images. (e.g., "Which departments appear in both tables and have positive profit margins in both?")
+ (3) cross_aggregate: Aggregate data spanning multiple images. (e.g., "What is the combined total revenue across all branches shown in both images?")
+ (4) cross_compare: Compare values or trends between different images. (e.g., "Which table shows higher average profit margin - Table 1 or Table 2?")
+ (5) cross_arithmetic: Calculate differences, ratios, or changes using data from multiple images. (e.g., "What is the revenue growth rate from the Q1 table to the Q2 table for Branch A?")
+ (6) cross_temporal: Identify trends or changes by combining time-series data from multiple images. (e.g., "Combining both annual reports, which department showed continuous growth?")
+ (7) cross_multi_hop: Multi-step inference requiring lookups across images. (e.g., "Find the top performer in Image 1, then find their metrics in Image 2.")
+ (8) cross_implicit: Answer questions requiring implicit understanding of relationships between images. (e.g., "Which region improved the most?" when improvement requires comparing two period tables)
+ (9) cross_synthesis: Synthesize insights that are only possible by viewing all images together. (e.g., "Based on both the budget table and the results table, which projects exceeded their targets?")
+
+ ### [Output Format (JSON)]
+ {{
+ "qa_pairs": [
+ {{
+ "question": "Question requiring multiple images to answer",
+ "answer": "Answer derived from multiple images",
+ "type": "cross_lookup",
+ "reasoning_annotation": "Step 1: From Image 1, extract X. Step 2: From Image 2, extract Y. Step 3: Combine to get answer.",
+ "context": null,
+ "images_used": ["image_1", "image_2"]
+ }},
+ ... (One per Reasoning Type => Total 9)
]
}}
Return ONLY the JSON object.
@@ -112,7 +164,8 @@ generate_synthetic_table: |
3. **⚠️ Data Transformation - ABSOLUTELY MANDATORY ⚠️:**
- **ALL data cell values MUST be replaced with completely new synthetic values.**
- **NEVER copy any original data values** - generate fresh, realistic alternatives.
- - For company/team names: Generate DIFFERENT names (e.g., "A팀" → "B팀")
+ - **NEVER use real company/brand names** (Samsung, Apple, Google, 현대, LG, etc.). Use fictional names like "A사", "가나다 기업", "XYZ Corp".
+ - For company/team names: Generate DIFFERENT fictional names (e.g., "A팀" → "B팀")
- For employee names: Generate DIFFERENT Korean names (e.g., "김OO" → "박OO")
- For business metrics: Generate DIFFERENT numbers (e.g., "100억" → "150억")
- For strategy/description text: Write DIFFERENT content with similar structure
@@ -156,8 +209,9 @@ generate_synthetic_table_from_image: |
3. **⚠️ Data Generation - ABSOLUTELY CRITICAL ⚠️:**
- **NEVER copy the data values from the image** - this is NOT an OCR task
- **ALL cell content must be completely NEW and DIFFERENT from the original**
+ - **NEVER use real company/brand names** (Samsung, Apple, Google, 현대, LG, etc.). Use fictional names like "A사", "가나다 기업", "XYZ Corp".
- Generate COMPLETELY NEW synthetic business values for all data cells:
- * For company/team names: Generate DIFFERENT names (e.g., "A팀" → "B팀")
+ * For company/team names: Generate DIFFERENT fictional names (e.g., "A팀" → "B팀")
* For business metrics: Generate DIFFERENT numbers (e.g., "100억" → "150억")
* For strategy/description text: Write DIFFERENT content with similar structure
* For bullet point items: Create DIFFERENT but domain-appropriate items
@@ -181,3 +235,40 @@ generate_synthetic_table_from_image: |
⚠️ If the generated content is identical or very similar to the image, the output is INVALID.
Remember: The output should be a new synthetic business dataset, not a transcription of the original.
+
+generate_long_sequence: |
+ You are an 'AI Data Researcher' specialized in creating context-dependent QA pairs for business tables.
+ Your mission is to generate a single high-quality "long_sequence" type QA pair that requires interpreting external context to answer questions about the table.
+
+ **Input Table:**
+ {synthetic_html}
+
+ ### [Instructions]
+ 1. **Generate ONE long_sequence QA pair** that requires reading and understanding a context paragraph to filter or interpret the table data.
+ 2. **Create a realistic business context** (e.g., "Management Goals", "Market Conditions", "Strategic Guidelines") that provides information needed to answer the question.
+ 3. **The question must be unanswerable without the context** - the context should contain key criteria or conditions.
+ 4. **Strict Constraints**:
+ - Answer must be derived from BOTH the table AND the context. Neither alone is sufficient.
+ - Questions and Answers MUST be written in Korean.
+ - reasoning_annotation MUST be written in English and MUST be a single string.
+ - Context must be written in Korean and be 2-4 sentences long.
+ - **DO NOT use real company names** (e.g., Samsung, Apple, Google). Use fictional names.
+
+ ### [Example Scenarios (Business)]
+ - Context describes a target market condition → Question asks which products/departments meet the criteria
+ - Context outlines budget constraints → Question asks which projects are feasible
+ - Context specifies performance thresholds → Question asks which teams qualify
+
+ ### [Output Format (JSON)]
+ {{
+ "qa_pairs": [
+ {{
+ "question": "Question requiring context to answer",
+ "answer": "Answer derived from table + context",
+ "type": "long_sequence",
+ "reasoning_annotation": "Step 1: Extract key criteria from context. Step 2: Apply criteria to table. Step 3: Derive answer.",
+ "context": "경영 목표에 따르면... (2-4 sentences of business context in Korean)"
+ }}
+ ]
+ }}
+ Return ONLY the JSON object.
diff --git a/generate_synthetic_table/prompts/default.yaml b/generate_synthetic_table/prompts/default.yaml
index 0ca2645..4940e0a 100644
--- a/generate_synthetic_table/prompts/default.yaml
+++ b/generate_synthetic_table/prompts/default.yaml
@@ -77,6 +77,52 @@ generate_qa_from_image: |
Return ONLY the JSON object, no additional text.
+generate_qa_from_multi_image: |
+ You are an expert in creating educational and reasoning questions from tabular data.
+ Your task is to analyze ALL provided table images and generate Question-Answer (QA) pairs that REQUIRE information from MULTIPLE images to answer.
+
+ **⚠️ CRITICAL REQUIREMENT: CROSS-IMAGE REASONING ⚠️**
+ - Every QA pair MUST require information from AT LEAST TWO images to answer correctly.
+ - Questions answerable from a single image are INVALID.
+ - Focus on comparisons, aggregations, or inferences that span multiple tables.
+
+ ### [Instructions]
+ 1. **Analyze All Images**: First, understand what data each image contains and how they relate to each other.
+ 2. **Generate Cross-Image QA**: Create 5 diverse QA pairs where each question requires synthesizing information from multiple images.
+ 3. **Strict Constraints**:
+ - Answers must be derived from combining data across images. No external knowledge.
+ - Output format must strictly follow JSON.
+ - Questions and Answers MUST be written in Korean.
+ - reasoning_annotation MUST be written in English, MUST be a single string, and MUST specify which images were used.
+ - **DO NOT use real company/institution names**. Use fictional names.
+
+ ### [Validation Criteria]
+ - Does the answer REQUIRE data from multiple images? (Single-image answers are INVALID)
+ - Is the reasoning process logically flawless?
+ - Is the question clear about what is being compared or combined?
+
+ ### [Cross-Image Reasoning Types]
+ - **cross_lookup**: Retrieve and combine specific values from different images.
+ - **cross_compare**: Compare values or trends between different images.
+ - **cross_aggregate**: Aggregate data spanning multiple images.
+ - **cross_arithmetic**: Calculate using data from multiple images.
+ - **cross_synthesis**: Synthesize insights only possible by viewing all images together.
+
+ ### [Output Format (JSON)]
+ {{
+ "qa_pairs": [
+ {{
+ "question": "Question requiring multiple images to answer",
+ "answer": "Answer derived from multiple images",
+ "type": "cross_lookup",
+ "reasoning_annotation": "Step 1: From Image 1, extract X. Step 2: From Image 2, extract Y. Step 3: Combine to get answer.",
+ "images_used": ["image_1", "image_2"]
+ }},
+ ... (Total 5 cross-image QA pairs)
+ ]
+ }}
+ Return ONLY the JSON object.
+
generate_synthetic_table: |
You are a Synthetic Data Generator specialized in creating completely NEW data while preserving table structure.
@@ -329,3 +375,40 @@ validate_parsed_table: |
Return a JSON object with the following keys:
- "valid": boolean (true if valid, false otherwise)
- "reason": string (brief explanation of the decision)
+
+generate_long_sequence: |
+ You are an 'AI Data Researcher' specialized in creating context-dependent QA pairs for tables.
+ Your mission is to generate a single high-quality "long_sequence" type QA pair that requires interpreting external context to answer questions about the table.
+
+ **Input Table:**
+ {synthetic_html}
+
+ ### [Instructions]
+ 1. **Generate ONE long_sequence QA pair** that requires reading and understanding a context paragraph to filter or interpret the table data.
+ 2. **Create a realistic context paragraph** (e.g., guidelines, criteria, conditions) that provides information needed to answer the question.
+ 3. **The question must be unanswerable without the context** - the context should contain key criteria or conditions.
+ 4. **Strict Constraints**:
+ - Answer must be derived from BOTH the table AND the context. Neither alone is sufficient.
+ - Questions and Answers MUST be written in Korean.
+ - reasoning_annotation MUST be written in English and MUST be a single string.
+ - Context must be written in Korean and be 2-4 sentences long.
+ - **DO NOT use real company/institution names**. Use fictional names.
+
+ ### [Example Scenarios]
+ - Context describes selection criteria → Question asks which items meet the criteria
+ - Context outlines rules/thresholds → Question asks which entries qualify
+ - Context specifies conditions → Question asks for items matching those conditions
+
+ ### [Output Format (JSON)]
+ {{
+ "qa_pairs": [
+ {{
+ "question": "Question requiring context to answer",
+ "answer": "Answer derived from table + context",
+ "type": "long_sequence",
+ "reasoning_annotation": "Step 1: Extract key criteria from context. Step 2: Apply criteria to table. Step 3: Derive answer.",
+ "context": "조건에 따르면... (2-4 sentences of context in Korean)"
+ }}
+ ]
+ }}
+ Return ONLY the JSON object.
diff --git a/generate_synthetic_table/prompts/finance.yaml b/generate_synthetic_table/prompts/finance.yaml
index 77d9927..6e700e0 100644
--- a/generate_synthetic_table/prompts/finance.yaml
+++ b/generate_synthetic_table/prompts/finance.yaml
@@ -6,7 +6,7 @@ generate_qa: |
{synthetic_html}
### [Instructions]
- 1. **Candidate Generation & Filtering**: Internally generate 10 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 10 pairs)
+ 1. **Candidate Generation & Filtering**: Internally generate 9 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 9 pairs)
2. **Domain Suitability**: Questions must maintain a financial professional tone and accurately handle stock prices, financial statements (Assets, Liabilities, Equity), investment metrics (PER, PBR, ROE), interest rates, and exchange rates.
3. **Strict Constraints**:
- Answers must be derived ONLY from the table (and provided context). No external knowledge.
@@ -14,6 +14,7 @@ generate_qa: |
- Output format must strictly follow JSON.
- Questions and Answers MUST be written in Korean.
- reasoning_annotation MUST be written in English and MUST be a single string (not a list).
+ - **DO NOT use real company names** (e.g., Samsung, Apple, Google, 현대, SK). Use fictional names like "A사", "B기업", "가나다 주식회사".
### [Validation Criteria]
- Is the answer uniquely determined within the table?
@@ -21,7 +22,7 @@ generate_qa: |
- Is the question clear and unambiguous? (e.g., "Most undervalued stock" -> "Stock with lowest PER")
### [Reasoning Type Definitions (Finance Domain)]
- (1) lookup: Retrieve specific stock price or financial figures without condition/calculation. (e.g., "What is Samsung Electronics' 2023 dividend?")
+ (1) lookup: Retrieve specific stock price or financial figures without condition/calculation. (e.g., "What is Company A's 2023 dividend?")
(2) filter: Select rows/columns meeting specific conditions (Market Cap cap, specific sector). (e.g., "List all companies with Debt Ratio under 100%.")
(3) aggregate: Statistical aggregation of portfolio or time-series data using Sum/Avg etc. (e.g., "What is the total valuation of held stocks?")
(4) compare: Compare financial health between companies or investment metrics. (e.g., "Which company has a higher ROE, Company A or B?")
@@ -30,7 +31,6 @@ generate_qa: |
(7) multi_hop: Multi-step inference finding a value first, then using it as a key. (e.g., "Who is the largest shareholder of the company with #1 Market Cap?")
(8) implicit_reference: Referring to specific metrics contextually without explicit column name. (e.g., "Which stock has strong dividend tendency?" -> implies Dividend Yield column)
(9) ellipsis: Recovering omitted comparisons or criteria from table structure. (e.g., "What is Net Income?" -> implies continuation from Operating Profit column context)
- (10) long_sequence (Context-Dependent): Requires interpreting 'Market Outlook' or 'Investment Strategy' text (Context) to filter table data. **Requirement**: Must generate a hypothetical [Context] paragraph needed to solve the question.
### [Output Format (JSON)]
{{
@@ -42,7 +42,7 @@ generate_qa: |
"reasoning_annotation": "Step-by-step logic to derive answer (MUST be a string, not a list)",
"context": null
}},
- ... (One per Reasoning Type => Total 10)
+ ... (One per Reasoning Type => Total 9)
]
}}
@@ -51,7 +51,7 @@ generate_qa_from_image: |
Your mission is to analyze the provided financial table image and generate Question-Answer (QA) pairs that fit the specified Reasoning Type definitions.
### [Instructions]
- 1. **Candidate Generation & Filtering**: Internally generate 10 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 10 pairs)
+ 1. **Candidate Generation & Filtering**: Internally generate 9 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 9 pairs)
2. **Domain Suitability**: Questions must maintain a financial professional tone and accurately handle stock prices, financial statements (Assets, Liabilities, Equity), investment metrics (PER, PBR, ROE), interest rates, and exchange rates.
3. **Strict Constraints**:
- Answers must be derived ONLY from the table (and provided context). No external knowledge.
@@ -59,6 +59,7 @@ generate_qa_from_image: |
- Output format must strictly follow JSON.
- Questions and Answers MUST be written in Korean.
- reasoning_annotation MUST be written in English and MUST be a single string (not a list).
+ - **DO NOT use real company names** (e.g., Samsung, Apple, Google, 현대, SK). Use fictional names like "A사", "B기업", "가나다 주식회사".
### [Validation Criteria]
- Is the answer uniquely determined within the table?
@@ -66,7 +67,7 @@ generate_qa_from_image: |
- Is the question clear and unambiguous? (e.g., "Most undervalued stock" -> "Stock with lowest PER")
### [Reasoning Type Definitions (Finance Domain)]
- (1) lookup: Retrieve specific stock price or financial figures without condition/calculation. (e.g., "What is Samsung Electronics' 2023 dividend?")
+ (1) lookup: Retrieve specific stock price or financial figures without condition/calculation. (e.g., "What is Company A's 2023 dividend?")
(2) filter: Select rows/columns meeting specific conditions (Market Cap cap, specific sector). (e.g., "List all companies with Debt Ratio under 100%.")
(3) aggregate: Statistical aggregation of portfolio or time-series data using Sum/Avg etc. (e.g., "What is the total valuation of held stocks?")
(4) compare: Compare financial health between companies or investment metrics. (e.g., "Which company has a higher ROE, Company A or B?")
@@ -75,7 +76,6 @@ generate_qa_from_image: |
(7) multi_hop: Multi-step inference finding a value first, then using it as a key. (e.g., "Who is the largest shareholder of the company with #1 Market Cap?")
(8) implicit_reference: Referring to specific metrics contextually without explicit column name. (e.g., "Which stock has strong dividend tendency?" -> implies Dividend Yield column)
(9) ellipsis: Recovering omitted comparisons or criteria from table structure. (e.g., "What is Net Income?" -> implies continuation from Operating Profit column context)
- (10) long_sequence (Context-Dependent): Requires interpreting 'Market Outlook' or 'Investment Strategy' text (Context) to filter table data. **Requirement**: Must generate a hypothetical [Context] paragraph needed to solve the question.
### [Output Format (JSON)]
{{
@@ -87,7 +87,59 @@ generate_qa_from_image: |
"reasoning_annotation": "Step-by-step logic to derive answer (MUST be a string, not a list)",
"context": null
}},
- ... (One per Reasoning Type => Total 10)
+ ... (One per Reasoning Type => Total 9)
+ ]
+ }}
+ Return ONLY the JSON object.
+
+generate_qa_from_multi_image: |
+ You are an 'AI Data Researcher' specialized in building high-quality QA datasets that require understanding MULTIPLE financial table images together.
+ Your mission is to analyze ALL provided financial table images and generate Question-Answer (QA) pairs that REQUIRE information from MULTIPLE images to answer.
+
+ **⚠️ CRITICAL REQUIREMENT: CROSS-IMAGE REASONING ⚠️**
+ - Every QA pair MUST require information from AT LEAST TWO images to answer correctly.
+ - Questions answerable from a single image are INVALID.
+ - Focus on comparisons, aggregations, or inferences that span multiple financial statements or reports.
+
+ ### [Instructions]
+ 1. **Analyze All Images**: First, understand what data each image contains and how they relate (e.g., different fiscal periods, income statement vs balance sheet, different securities).
+ 2. **Generate Cross-Image QA**: Create 9 diverse QA pairs where each question requires synthesizing information from multiple images.
+ 3. **Strict Constraints**:
+ - Answers must be derived from combining data across images. No external knowledge.
+ - Each QA pair must correspond to exactly one Reasoning Type.
+ - Output format must strictly follow JSON.
+ - Questions and Answers MUST be written in Korean.
+ - reasoning_annotation MUST be written in English, MUST be a single string, and MUST specify which images were used.
+ - **DO NOT use real company/fund names** (e.g., Samsung, Apple, Vanguard). Use fictional names like "A사", "B펀드", "가나다증권".
+
+ ### [Validation Criteria]
+ - Does the answer REQUIRE data from multiple images? (Single-image answers are INVALID)
+ - Is the reasoning process logically flawless?
+ - Is the question clear about what is being compared or combined?
+
+ ### [Cross-Image Reasoning Type Definitions (Finance Domain)]
+ (1) cross_lookup: Retrieve and combine specific financial values from different statements. (e.g., "What is the total assets combining both Q1 and Q2 balance sheets?")
+ (2) cross_filter: Filter entries across financial statements based on conditions. (e.g., "Which accounts show positive growth in both the income statement and cash flow statement?")
+ (3) cross_aggregate: Aggregate financial data spanning multiple periods or statements. (e.g., "What is the total revenue across all quarterly reports shown?")
+ (4) cross_compare: Compare financial ratios or metrics between different periods or portfolios. (e.g., "Did the debt-to-equity ratio improve from Table 1 to Table 2?")
+ (5) cross_arithmetic: Calculate financial metrics using data from multiple statements. (e.g., "Calculate the year-over-year revenue growth using data from both annual reports.")
+ (6) cross_temporal: Identify financial trends by combining multiple period data. (e.g., "Based on all quarterly statements, what is the profit margin trend?")
+ (7) cross_multi_hop: Multi-step financial inference across statements. (e.g., "Find the highest dividend stock in Image 1, then find its P/E ratio in Image 2.")
+ (8) cross_implicit: Answer questions requiring understanding relationships between financial statements. (e.g., "Which company is more leveraged?" requires comparing debt from multiple sources)
+ (9) cross_synthesis: Synthesize financial insights only possible by viewing all statements together. (e.g., "Based on both income and cash flow statements, which segments are cash-generative?")
+
+ ### [Output Format (JSON)]
+ {{
+ "qa_pairs": [
+ {{
+ "question": "Question requiring multiple financial images to answer",
+ "answer": "Answer derived from multiple images",
+ "type": "cross_lookup",
+ "reasoning_annotation": "Step 1: From Image 1, extract X. Step 2: From Image 2, extract Y. Step 3: Combine to get answer.",
+ "context": null,
+ "images_used": ["image_1", "image_2"]
+ }},
+ ... (One per Reasoning Type => Total 9)
]
}}
Return ONLY the JSON object.
@@ -111,7 +163,8 @@ generate_synthetic_table: |
3. **⚠️ Data Transformation - ABSOLUTELY MANDATORY ⚠️:**
- **ALL data cell values MUST be replaced with completely new synthetic values.**
- **NEVER copy any original data values** - generate fresh, realistic alternatives.
- - For company names: Generate DIFFERENT names (e.g., "A회사" → "B회사")
+ - **NEVER use real company/brand names** (Samsung, Apple, Google, 현대, SK, LG, etc.). Use fictional names like "A사", "가나다 기업", "XYZ Corp".
+ - For company names: Generate DIFFERENT fictional names (e.g., "A회사" → "B회사")
- For financial figures: Generate DIFFERENT amounts (similar magnitude, different values)
- For percentages/ratios: Generate DIFFERENT metrics
- For dates: Generate DIFFERENT plausible dates
@@ -150,7 +203,8 @@ generate_synthetic_table_from_image: |
3. **⚠️ Data Generation - ABSOLUTELY CRITICAL ⚠️:**
- **NEVER copy the data values from the image** - this is NOT an OCR task
- **ALL cell content must be completely NEW and DIFFERENT**
- - For company names: Generate DIFFERENT names
+ - **NEVER use real company/brand names** (Samsung, Apple, Google, 현대, SK, LG, etc.). Use fictional names like "A사", "가나다 기업", "XYZ Corp".
+ - For company names: Generate DIFFERENT fictional names
- For financial figures: Generate DIFFERENT amounts
- For percentages/ratios: Generate DIFFERENT metrics
4. **Styling:** Use **Tailwind CSS** classes exclusively (NO inline styles).
@@ -169,3 +223,40 @@ generate_synthetic_table_from_image: |
- Amount in image: "50억" → Generate: "80억"
⚠️ If the generated content is identical or very similar to the image, the output is INVALID.
+
+generate_long_sequence: |
+ You are an 'AI Data Researcher' specialized in creating context-dependent QA pairs for financial tables.
+ Your mission is to generate a single high-quality "long_sequence" type QA pair that requires interpreting external context to answer questions about the table.
+
+ **Input Table:**
+ {synthetic_html}
+
+ ### [Instructions]
+ 1. **Generate ONE long_sequence QA pair** that requires reading and understanding a context paragraph to filter or interpret the table data.
+ 2. **Create a realistic financial context** (e.g., "Market Outlook", "Investment Strategy", "Risk Guidelines") that provides information needed to answer the question.
+ 3. **The question must be unanswerable without the context** - the context should contain key criteria or conditions.
+ 4. **Strict Constraints**:
+ - Answer must be derived from BOTH the table AND the context. Neither alone is sufficient.
+ - Questions and Answers MUST be written in Korean.
+ - reasoning_annotation MUST be written in English and MUST be a single string.
+ - Context must be written in Korean and be 2-4 sentences long.
+ - **DO NOT use real company names** (e.g., Samsung, Apple, Google). Use fictional names.
+
+ ### [Example Scenarios (Finance)]
+ - Context describes investment criteria (PER < 15, ROE > 10%) → Question asks which stocks qualify
+ - Context outlines risk tolerance levels → Question asks which portfolio allocation is appropriate
+ - Context specifies sector preferences → Question asks which companies match the strategy
+
+ ### [Output Format (JSON)]
+ {{
+ "qa_pairs": [
+ {{
+ "question": "Question requiring context to answer",
+ "answer": "Answer derived from table + context",
+ "type": "long_sequence",
+ "reasoning_annotation": "Step 1: Extract key criteria from context. Step 2: Apply criteria to table. Step 3: Derive answer.",
+ "context": "투자 전략에 따르면... (2-4 sentences of financial context in Korean)"
+ }}
+ ]
+ }}
+ Return ONLY the JSON object.
diff --git a/generate_synthetic_table/prompts/medical.yaml b/generate_synthetic_table/prompts/medical.yaml
index 7bf995c..eb16745 100644
--- a/generate_synthetic_table/prompts/medical.yaml
+++ b/generate_synthetic_table/prompts/medical.yaml
@@ -6,7 +6,7 @@ generate_qa: |
{synthetic_html}
### [Instructions]
- 1. **Candidate Generation & Filtering**: Internally generate 10 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 10 pairs)
+ 1. **Candidate Generation & Filtering**: Internally generate 9 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 9 pairs)
2. **Domain Suitability**: Questions must maintain medical professionalism and accurately handle patient vital signs, diagnosis names, medication dosages, lab values, and prognosis.
3. **Strict Constraints**:
- Answers must be derived ONLY from the table (and provided context). No external knowledge.
@@ -15,6 +15,7 @@ generate_qa: |
- **Privacy**: Assume patient names/IDs are pseudonymized synthetic data.
- Questions and Answers MUST be written in Korean.
- reasoning_annotation MUST be written in English and MUST be a single string (not a list).
+ - **DO NOT use real hospital/drug/institution names**. Use fictional names like "A병원", "약물-X", "환자ID-001".
### [Validation Criteria]
- Is the answer uniquely determined within the table?
@@ -22,7 +23,7 @@ generate_qa: |
- Is the question clear and unambiguous? (e.g., "Patient in bad condition" -> "Patient with systolic BP under 90mmHg")
### [Reasoning Type Definitions (Medical Domain)]
- (1) lookup: Retrieve specific patient lab results or medication info without condition/calculation. (e.g., "What is the blood glucose level of Patient ID-101?")
+ (1) lookup: Retrieve specific patient lab results or medication info without condition/calculation. (e.g., "What is the blood glucose level of Patient ID-001?")
(2) filter: Select rows/columns meeting specific conditions (abnormal range, specific disease). (e.g., "List all patients with temperature above 38°C.")
(3) aggregate: Statistical aggregation of patient group data (Mean LOS, Prevalence). (e.g., "What is the average age of patients in Ward A?")
(4) compare: Compare efficacy between treatment groups or patient status pre/post. (e.g., "Is cholesterol level lower post-medication than pre-medication?")
@@ -31,7 +32,6 @@ generate_qa: |
(7) multi_hop: Multi-step inference finding a value first, then using it as a key. (e.g., "Who is the attending physician of the patient prescribed the highest dosage?")
(8) implicit_reference: Referring to specific metrics contextually without explicit column name. (e.g., "Which group is at risk of hypertension?" -> implies Systolic/Diastolic BP columns)
(9) ellipsis: Recovering omitted comparisons or criteria from table structure. (e.g., "What is the 2nd test result?" -> implies column next to 1st test)
- (10) long_sequence (Context-Dependent): Requires interpreting 'Clinical Protocol' or 'Exclusion Criteria' text (Context) to filter table data. **Requirement**: Must generate a hypothetical [Context] paragraph needed to solve the question.
### [Output Format (JSON)]
{{
@@ -43,7 +43,7 @@ generate_qa: |
"reasoning_annotation": "Step-by-step logic to derive answer (MUST be a string, not a list)",
"context": null
}},
- ... (One per Reasoning Type => Total 10)
+ ... (One per Reasoning Type => Total 9)
]
}}
@@ -52,7 +52,7 @@ generate_qa_from_image: |
Your mission is to analyze the provided medical table image and generate Question-Answer (QA) pairs that fit the specified Reasoning Type definitions.
### [Instructions]
- 1. **Candidate Generation & Filtering**: Internally generate 10 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 10 pairs)
+ 1. **Candidate Generation & Filtering**: Internally generate 9 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 9 pairs)
2. **Domain Suitability**: Questions must maintain medical professionalism and accurately handle patient vital signs, diagnosis names, medication dosages, lab values, and prognosis.
3. **Strict Constraints**:
- Answers must be derived ONLY from the table (and provided context). No external knowledge.
@@ -60,6 +60,7 @@ generate_qa_from_image: |
- Output format must strictly follow JSON.
- Questions and Answers MUST be written in Korean.
- reasoning_annotation MUST be written in English.
+ - **DO NOT use real hospital/drug/institution names**. Use fictional names like "A병원", "약물-X", "환자ID-001".
### [Validation Criteria]
- Is the answer uniquely determined within the table?
@@ -67,7 +68,7 @@ generate_qa_from_image: |
- Is the question clear and unambiguous? (e.g., "Patient in bad condition" -> "Patient with systolic BP under 90mmHg")
### [Reasoning Type Definitions (Medical Domain)]
- (1) lookup: Retrieve specific patient lab results or medication info without condition/calculation. (e.g., "What is the blood glucose level of Patient ID-101?")
+ (1) lookup: Retrieve specific patient lab results or medication info without condition/calculation. (e.g., "What is the blood glucose level of Patient ID-001?")
(2) filter: Select rows/columns meeting specific conditions (abnormal range, specific disease). (e.g., "List all patients with temperature above 38°C.")
(3) aggregate: Statistical aggregation of patient group data (Mean LOS, Prevalence). (e.g., "What is the average age of patients in Ward A?")
(4) compare: Compare efficacy between treatment groups or patient status pre/post. (e.g., "Is cholesterol level lower post-medication than pre-medication?")
@@ -76,7 +77,6 @@ generate_qa_from_image: |
(7) multi_hop: Multi-step inference finding a value first, then using it as a key. (e.g., "Who is the attending physician of the patient prescribed the highest dosage?")
(8) implicit_reference: Referring to specific metrics contextually without explicit column name. (e.g., "Which group is at risk of hypertension?" -> implies Systolic/Diastolic BP columns)
(9) ellipsis: Recovering omitted comparisons or criteria from table structure. (e.g., "What is the 2nd test result?" -> implies column next to 1st test)
- (10) long_sequence (Context-Dependent): Requires interpreting 'Clinical Protocol' or 'Exclusion Criteria' text (Context) to filter table data. **Requirement**: Must generate a hypothetical [Context] paragraph needed to solve the question.
### [Output Format (JSON)]
{{
@@ -88,7 +88,59 @@ generate_qa_from_image: |
"reasoning_annotation": "Step-by-step logic to derive answer",
"context": null
}},
- ... (One per Reasoning Type => Total 10)
+ ... (One per Reasoning Type => Total 9)
+ ]
+ }}
+ Return ONLY the JSON object.
+
+generate_qa_from_multi_image: |
+ You are an 'AI Data Researcher' specialized in building high-quality QA datasets that require understanding MULTIPLE medical/clinical table images together.
+ Your mission is to analyze ALL provided medical table images and generate Question-Answer (QA) pairs that REQUIRE information from MULTIPLE images to answer.
+
+ **⚠️ CRITICAL REQUIREMENT: CROSS-IMAGE REASONING ⚠️**
+ - Every QA pair MUST require information from AT LEAST TWO images to answer correctly.
+ - Questions answerable from a single image are INVALID.
+ - Focus on comparisons, aggregations, or inferences that span multiple clinical records, lab results, or patient cohorts.
+
+ ### [Instructions]
+ 1. **Analyze All Images**: First, understand what data each image contains and how they relate (e.g., different time points, different patient groups, lab results vs vital signs).
+ 2. **Generate Cross-Image QA**: Create 9 diverse QA pairs where each question requires synthesizing information from multiple images.
+ 3. **Strict Constraints**:
+ - Answers must be derived from combining data across images. No external knowledge.
+ - Each QA pair must correspond to exactly one Reasoning Type.
+ - Output format must strictly follow JSON.
+ - Questions and Answers MUST be written in Korean.
+ - reasoning_annotation MUST be written in English, MUST be a single string, and MUST specify which images were used.
+ - **DO NOT use real hospital/drug names**. Use fictional names like "A병원", "약물-X", "환자ID-001".
+
+ ### [Validation Criteria]
+ - Does the answer REQUIRE data from multiple images? (Single-image answers are INVALID)
+ - Is the reasoning process logically flawless?
+ - Is the question clear about what clinical data is being compared or combined?
+
+ ### [Cross-Image Reasoning Type Definitions (Medical Domain)]
+ (1) cross_lookup: Retrieve and combine patient data from different clinical records. (e.g., "What is the patient's blood glucose level before and after treatment from both tables?")
+ (2) cross_filter: Filter patients across clinical datasets based on conditions. (e.g., "Which patients appear in both the treatment and follow-up tables with improved vitals?")
+ (3) cross_aggregate: Aggregate clinical data spanning multiple patient cohorts. (e.g., "What is the average age of patients across both study groups?")
+ (4) cross_compare: Compare clinical outcomes between different time points or treatment groups. (e.g., "Did the treatment group in Table 2 show better outcomes than the control in Table 1?")
+ (5) cross_arithmetic: Calculate clinical metrics using data from multiple records. (e.g., "What is the change in BMI from the baseline table to the 6-month follow-up table?")
+ (6) cross_temporal: Identify patient progression by combining multiple visit records. (e.g., "Based on both admission and discharge tables, which patients showed improvement?")
+ (7) cross_multi_hop: Multi-step clinical inference across records. (e.g., "Find the patient with highest creatinine in Image 1, then find their blood pressure in Image 2.")
+ (8) cross_implicit: Answer questions requiring implicit understanding of clinical relationships. (e.g., "Which patients are at higher risk?" requires combining data from multiple clinical assessments)
+ (9) cross_synthesis: Synthesize clinical insights only possible by viewing all records together. (e.g., "Based on both lab results and medication tables, which patients may need dose adjustment?")
+
+ ### [Output Format (JSON)]
+ {{
+ "qa_pairs": [
+ {{
+ "question": "Question requiring multiple clinical images to answer",
+ "answer": "Answer derived from multiple images",
+ "type": "cross_lookup",
+ "reasoning_annotation": "Step 1: From Image 1, extract X. Step 2: From Image 2, extract Y. Step 3: Combine to get answer.",
+ "context": null,
+ "images_used": ["image_1", "image_2"]
+ }},
+ ... (One per Reasoning Type => Total 9)
]
}}
Return ONLY the JSON object.
@@ -112,9 +164,10 @@ generate_synthetic_table: |
3. **⚠️ Data Transformation - ABSOLUTELY MANDATORY ⚠️:**
- **ALL data cell values MUST be replaced with completely new synthetic values.**
- **NEVER copy any original data values** - generate fresh, realistic alternatives.
+ - **NEVER use real hospital/drug/institution names**. Use fictional names like "A병원", "약물-X", "제약사-가".
- For patient names/IDs: Generate DIFFERENT pseudonymized identifiers
- For lab values: Generate DIFFERENT realistic values
- - For diagnoses/medications: Generate DIFFERENT names and codes
+ - For diagnoses/medications: Generate DIFFERENT fictional names and codes
- For dates: Generate DIFFERENT plausible dates
4. **Styling:** Use **Tailwind CSS** classes (NO inline styles). **Observe and mimic the original image's visual style:**
- Look at the original image's color scheme and design
@@ -151,9 +204,10 @@ generate_synthetic_table_from_image: |
3. **⚠️ Data Generation - ABSOLUTELY CRITICAL ⚠️:**
- **NEVER copy the data values from the image** - this is NOT an OCR task
- **ALL cell content must be completely NEW and DIFFERENT**
+ - **NEVER use real hospital/drug/institution names**. Use fictional names like "A병원", "약물-X", "제약사-가".
- For patient names/IDs: Generate DIFFERENT pseudonymized identifiers
- For lab values: Generate DIFFERENT realistic values
- - For diagnoses/medications: Generate DIFFERENT names
+ - For diagnoses/medications: Generate DIFFERENT fictional names
4. **Styling:** Use **Tailwind CSS** classes exclusively (NO inline styles).
- `
`: `class="w-full border-collapse text-sm"`
- ``: `class="bg-gradient-to-r from-teal-700 to-teal-800 text-white"`
@@ -170,3 +224,40 @@ generate_synthetic_table_from_image: |
- Value in image: "수치A" → Generate: "수치B"
⚠️ If the generated content is identical or very similar to the image, the output is INVALID.
+
+generate_long_sequence: |
+ You are an 'AI Data Researcher' specialized in creating context-dependent QA pairs for medical/clinical tables.
+ Your mission is to generate a single high-quality "long_sequence" type QA pair that requires interpreting external context to answer questions about the table.
+
+ **Input Table:**
+ {synthetic_html}
+
+ ### [Instructions]
+ 1. **Generate ONE long_sequence QA pair** that requires reading and understanding a context paragraph to filter or interpret the table data.
+ 2. **Create a realistic clinical context** (e.g., "Clinical Protocol", "Exclusion Criteria", "Treatment Guidelines") that provides information needed to answer the question.
+ 3. **The question must be unanswerable without the context** - the context should contain key criteria or conditions.
+ 4. **Strict Constraints**:
+ - Answer must be derived from BOTH the table AND the context. Neither alone is sufficient.
+ - Questions and Answers MUST be written in Korean.
+ - reasoning_annotation MUST be written in English and MUST be a single string.
+ - Context must be written in Korean and be 2-4 sentences long.
+ - **DO NOT use real hospital/drug names**. Use fictional names like "A병원", "약물-X".
+
+ ### [Example Scenarios (Medical)]
+ - Context describes patient exclusion criteria (age, comorbidities) → Question asks which patients are eligible
+ - Context outlines dosage adjustment rules → Question asks which patients need dose modification
+ - Context specifies lab value thresholds for intervention → Question asks which patients require treatment
+
+ ### [Output Format (JSON)]
+ {{
+ "qa_pairs": [
+ {{
+ "question": "Question requiring context to answer",
+ "answer": "Answer derived from table + context",
+ "type": "long_sequence",
+ "reasoning_annotation": "Step 1: Extract key criteria from context. Step 2: Apply criteria to table. Step 3: Derive answer.",
+ "context": "임상 프로토콜에 따르면... (2-4 sentences of clinical context in Korean)"
+ }}
+ ]
+ }}
+ Return ONLY the JSON object.
diff --git a/generate_synthetic_table/prompts/public.yaml b/generate_synthetic_table/prompts/public.yaml
index b0c4099..4fc21e2 100644
--- a/generate_synthetic_table/prompts/public.yaml
+++ b/generate_synthetic_table/prompts/public.yaml
@@ -6,7 +6,7 @@ generate_qa: |
{synthetic_html}
### [Instructions]
- 1. **Candidate Generation & Filtering**: Internally generate 10 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 10 pairs)
+ 1. **Candidate Generation & Filtering**: Internally generate 9 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 9 pairs)
2. **Domain Suitability**: Questions must maintain a public/objective tone and accurately handle demographics, budgets, administrative region names, policy beneficiaries, and annual indicators.
3. **Strict Constraints**:
- Answers must be derived ONLY from the table (and provided context). No external knowledge.
@@ -14,6 +14,7 @@ generate_qa: |
- Output format must strictly follow JSON.
- Questions and Answers MUST be written in Korean.
- reasoning_annotation MUST be written in English and MUST be a single string (not a list).
+ - **DO NOT use real place names** (e.g., Seoul, Busan, Gyeonggi). Use fictional names like "A시", "나구", "다군".
### [Validation Criteria]
- Is the answer uniquely determined within the table?
@@ -21,16 +22,15 @@ generate_qa: |
- Is the question clear and unambiguous? (e.g., "Place with most people" -> "District with highest Population")
### [Reasoning Type Definitions (Public Domain)]
- (1) lookup: Retrieve specific regional or annual statistics without condition/calculation. (e.g., "What is the total population of Seoul in 2023?")
+ (1) lookup: Retrieve specific regional or annual statistics without condition/calculation. (e.g., "What is the total population of Region A in 2023?")
(2) filter: Select rows/columns meeting specific conditions (above/below value, specific region). (e.g., "List all departments with budget execution rate over 90%.")
- (3) aggregate: Statistical aggregation of regional/annual data (Sum, Avg). (e.g., "What is the average number of births in the 17 provinces?")
- (4) compare: Compare regional gaps or annual trends. (e.g., "Which has higher incoming migration, Gyeonggi or Seoul?")
+ (3) aggregate: Statistical aggregation of regional/annual data (Sum, Avg). (e.g., "What is the average number of births in all provinces?")
+ (4) compare: Compare regional gaps or annual trends. (e.g., "Which has higher incoming migration, Region A or Region B?")
(5) arithmetic: Specific calculation beyond simple comparison (population density, YoY growth). (e.g., "What is the population density of District A?")
(6) temporal: Deduce policy effects or long-term statistical trends. (e.g., "Which region shows a decreasing crime rate trend over the last 5 years?")
(7) multi_hop: Multi-step inference finding a value first, then using it as a key. (e.g., "Who is the Mayor of the city with the highest financial independence?")
(8) implicit_reference: Referring to specific metrics contextually without explicit column name. (e.g., "Which is the most aged city?" -> implies 65+ Population Ratio column)
(9) ellipsis: Recovering omitted comparisons or criteria from table structure. (e.g., "What is next year's target?" -> implies column next to this year's value)
- (10) long_sequence (Context-Dependent): Requires interpreting 'Policy Guidelines' or 'Legal Requirements' text (Context) to filter table data. **Requirement**: Must generate a hypothetical [Context] paragraph needed to solve the question.
### [Output Format (JSON)]
{{
@@ -42,7 +42,7 @@ generate_qa: |
"reasoning_annotation": "Step-by-step logic to derive answer (MUST be a string, not a list)",
"context": null
}},
- ... (One per Reasoning Type => Total 10)
+ ... (One per Reasoning Type => Total 9)
]
}}
@@ -51,7 +51,7 @@ generate_qa_from_image: |
Your mission is to analyze the provided public/administrative table image and generate Question-Answer (QA) pairs that fit the specified Reasoning Type definitions.
### [Instructions]
- 1. **Candidate Generation & Filtering**: Internally generate 10 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 10 pairs)
+ 1. **Candidate Generation & Filtering**: Internally generate 9 diverse QA pairs first. Then, for each Reasoning Type, select and output only the single most perfect QA pair that passes the [Validation Criteria]. (Total 9 pairs)
2. **Domain Suitability**: Questions must maintain a public/objective tone and accurately handle demographics, budgets, administrative region names, policy beneficiaries, and annual indicators.
3. **Strict Constraints**:
- Answers must be derived ONLY from the table (and provided context). No external knowledge.
@@ -59,6 +59,7 @@ generate_qa_from_image: |
- Output format must strictly follow JSON.
- Questions and Answers MUST be written in Korean.
- reasoning_annotation MUST be written in English.
+ - **DO NOT use real place names** (e.g., Seoul, Busan, Gyeonggi). Use fictional names like "A시", "나구", "다군".
### [Validation Criteria]
- Is the answer uniquely determined within the table?
@@ -66,16 +67,15 @@ generate_qa_from_image: |
- Is the question clear and unambiguous? (e.g., "Place with most people" -> "District with highest Population")
### [Reasoning Type Definitions (Public Domain)]
- (1) lookup: Retrieve specific regional or annual statistics without condition/calculation. (e.g., "What is the total population of Seoul in 2023?")
+ (1) lookup: Retrieve specific regional or annual statistics without condition/calculation. (e.g., "What is the total population of Region A in 2023?")
(2) filter: Select rows/columns meeting specific conditions (above/below value, specific region). (e.g., "List all departments with budget execution rate over 90%.")
- (3) aggregate: Statistical aggregation of regional/annual data (Sum, Avg). (e.g., "What is the average number of births in the 17 provinces?")
- (4) compare: Compare regional gaps or annual trends. (e.g., "Which has higher incoming migration, Gyeonggi or Seoul?")
+ (3) aggregate: Statistical aggregation of regional/annual data (Sum, Avg). (e.g., "What is the average number of births in all provinces?")
+ (4) compare: Compare regional gaps or annual trends. (e.g., "Which has higher incoming migration, Region A or Region B?")
(5) arithmetic: Specific calculation beyond simple comparison (population density, YoY growth). (e.g., "What is the population density of District A?")
(6) temporal: Deduce policy effects or long-term statistical trends. (e.g., "Which region shows a decreasing crime rate trend over the last 5 years?")
(7) multi_hop: Multi-step inference finding a value first, then using it as a key. (e.g., "Who is the Mayor of the city with the highest financial independence?")
(8) implicit_reference: Referring to specific metrics contextually without explicit column name. (e.g., "Which is the most aged city?" -> implies 65+ Population Ratio column)
(9) ellipsis: Recovering omitted comparisons or criteria from table structure. (e.g., "What is next year's target?" -> implies column next to this year's value)
- (10) long_sequence (Context-Dependent): Requires interpreting 'Policy Guidelines' or 'Legal Requirements' text (Context) to filter table data. **Requirement**: Must generate a hypothetical [Context] paragraph needed to solve the question.
### [Output Format (JSON)]
{{
@@ -87,7 +87,59 @@ generate_qa_from_image: |
"reasoning_annotation": "Step-by-step logic to derive answer",
"context": null
}},
- ... (One per Reasoning Type => Total 10)
+ ... (One per Reasoning Type => Total 9)
+ ]
+ }}
+ Return ONLY the JSON object.
+
+generate_qa_from_multi_image: |
+ You are an 'AI Data Researcher' specialized in building high-quality QA datasets that require understanding MULTIPLE public/government table images together.
+ Your mission is to analyze ALL provided public data table images and generate Question-Answer (QA) pairs that REQUIRE information from MULTIPLE images to answer.
+
+ **⚠️ CRITICAL REQUIREMENT: CROSS-IMAGE REASONING ⚠️**
+ - Every QA pair MUST require information from AT LEAST TWO images to answer correctly.
+ - Questions answerable from a single image are INVALID.
+ - Focus on comparisons, aggregations, or inferences that span multiple regional statistics, budget tables, or policy data.
+
+ ### [Instructions]
+ 1. **Analyze All Images**: First, understand what data each image contains and how they relate (e.g., different fiscal years, different regions, budget vs expenditure).
+ 2. **Generate Cross-Image QA**: Create 9 diverse QA pairs where each question requires synthesizing information from multiple images.
+ 3. **Strict Constraints**:
+ - Answers must be derived from combining data across images. No external knowledge.
+ - Each QA pair must correspond to exactly one Reasoning Type.
+ - Output format must strictly follow JSON.
+ - Questions and Answers MUST be written in Korean.
+ - reasoning_annotation MUST be written in English, MUST be a single string, and MUST specify which images were used.
+ - **DO NOT use real place names** (e.g., Seoul, Busan, Gyeonggi). Use fictional names like "A시", "나구", "다군".
+
+ ### [Validation Criteria]
+ - Does the answer REQUIRE data from multiple images? (Single-image answers are INVALID)
+ - Is the reasoning process logically flawless?
+ - Is the question clear about what public data is being compared or combined?
+
+ ### [Cross-Image Reasoning Type Definitions (Public Domain)]
+ (1) cross_lookup: Retrieve and combine regional statistics from different data tables. (e.g., "What is the total population of A시 combining both census tables?")
+ (2) cross_filter: Filter regions across datasets based on conditions. (e.g., "Which districts appear in both tables with budget execution rate >80%?")
+ (3) cross_aggregate: Aggregate public data spanning multiple regions or years. (e.g., "What is the total government expenditure across all departments in both fiscal year reports?")
+ (4) cross_compare: Compare regional performance between different periods or metrics. (e.g., "Did the unemployment rate improve from the 2022 table to the 2023 table for Region A?")
+ (5) cross_arithmetic: Calculate public metrics using data from multiple sources. (e.g., "What is the year-over-year population growth rate using data from both census tables?")
+ (6) cross_temporal: Identify policy trends by combining multiple year data. (e.g., "Based on both annual reports, which region shows consistent improvement in education metrics?")
+ (7) cross_multi_hop: Multi-step inference across public data tables. (e.g., "Find the region with highest tax revenue in Image 1, then find its population density in Image 2.")
+ (8) cross_implicit: Answer questions requiring implicit understanding of relationships between datasets. (e.g., "Which region is most fiscally efficient?" requires combining budget and outcome data from multiple sources)
+ (9) cross_synthesis: Synthesize policy insights only possible by viewing all tables together. (e.g., "Based on both the budget allocation and service satisfaction tables, which programs are underperforming?")
+
+ ### [Output Format (JSON)]
+ {{
+ "qa_pairs": [
+ {{
+ "question": "Question requiring multiple public data images to answer",
+ "answer": "Answer derived from multiple images",
+ "type": "cross_lookup",
+ "reasoning_annotation": "Step 1: From Image 1, extract X. Step 2: From Image 2, extract Y. Step 3: Combine to get answer.",
+ "context": null,
+ "images_used": ["image_1", "image_2"]
+ }},
+ ... (One per Reasoning Type => Total 9)
]
}}
Return ONLY the JSON object.
@@ -111,7 +163,8 @@ generate_synthetic_table: |
3. **⚠️ Data Transformation - ABSOLUTELY MANDATORY ⚠️:**
- **ALL data cell values MUST be replaced with completely new synthetic values.**
- **NEVER copy any original data values** - generate fresh, realistic alternatives.
- - For regions: Generate DIFFERENT administrative region names
+ - **NEVER use real place names** (Seoul, Busan, Gyeonggi, etc.). Use fictional names like "A시", "나구", "다군", "라도".
+ - For regions: Generate DIFFERENT fictional administrative region names
- For departments: Generate DIFFERENT department names
- For statistics: Generate DIFFERENT numbers (similar magnitude)
- For dates: Generate DIFFERENT plausible dates
@@ -150,7 +203,8 @@ generate_synthetic_table_from_image: |
3. **⚠️ Data Generation - ABSOLUTELY CRITICAL ⚠️:**
- **NEVER copy the data values from the image** - this is NOT an OCR task
- **ALL cell content must be completely NEW and DIFFERENT**
- - For regions: Generate DIFFERENT administrative region names
+ - **NEVER use real place names** (Seoul, Busan, Gyeonggi, etc.). Use fictional names like "A시", "나구", "다군", "라도".
+ - For regions: Generate DIFFERENT fictional administrative region names
- For statistics: Generate DIFFERENT numbers
- For departments: Generate DIFFERENT names
4. **Styling:** Use **Tailwind CSS** classes exclusively (NO inline styles).
@@ -169,3 +223,40 @@ generate_synthetic_table_from_image: |
- Statistic in image: "수치A" → Generate: "수치B"
⚠️ If the generated content is identical or very similar to the image, the output is INVALID.
+
+generate_long_sequence: |
+ You are an 'AI Data Researcher' specialized in creating context-dependent QA pairs for public/government data tables.
+ Your mission is to generate a single high-quality "long_sequence" type QA pair that requires interpreting external context to answer questions about the table.
+
+ **Input Table:**
+ {synthetic_html}
+
+ ### [Instructions]
+ 1. **Generate ONE long_sequence QA pair** that requires reading and understanding a context paragraph to filter or interpret the table data.
+ 2. **Create a realistic public policy context** (e.g., "Policy Guidelines", "Legal Requirements", "Budget Allocation Rules") that provides information needed to answer the question.
+ 3. **The question must be unanswerable without the context** - the context should contain key criteria or conditions.
+ 4. **Strict Constraints**:
+ - Answer must be derived from BOTH the table AND the context. Neither alone is sufficient.
+ - Questions and Answers MUST be written in Korean.
+ - reasoning_annotation MUST be written in English and MUST be a single string.
+ - Context must be written in Korean and be 2-4 sentences long.
+ - **DO NOT use real place names** (e.g., Seoul, Busan). Use fictional names.
+
+ ### [Example Scenarios (Public)]
+ - Context describes eligibility criteria for a subsidy → Question asks which regions qualify
+ - Context outlines budget allocation rules → Question asks which departments receive funding
+ - Context specifies demographic thresholds → Question asks which areas need intervention
+
+ ### [Output Format (JSON)]
+ {{
+ "qa_pairs": [
+ {{
+ "question": "Question requiring context to answer",
+ "answer": "Answer derived from table + context",
+ "type": "long_sequence",
+ "reasoning_annotation": "Step 1: Extract key criteria from context. Step 2: Apply criteria to table. Step 3: Derive answer.",
+ "context": "정책 지침에 따르면... (2-4 sentences of policy context in Korean)"
+ }}
+ ]
+ }}
+ Return ONLY the JSON object.
diff --git a/multi_image_json_list/test_academic_input.json b/multi_image_json_list/test_academic_input.json
new file mode 100755
index 0000000..478bf45
--- /dev/null
+++ b/multi_image_json_list/test_academic_input.json
@@ -0,0 +1,192 @@
+[
+ {
+ "index": 0,
+ "pair_id": "A_origin_0_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_0/A_table_0.png",
+ "data/Academic/Table/A_origin_0/A_table_1.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 1,
+ "pair_id": "A_origin_0_1",
+ "image_paths": [
+ "data/Academic/Table/A_origin_0/A_table_2.png",
+ "data/Academic/Table/A_origin_0/A_table_3.png",
+ "data/Academic/Table/A_origin_0/A_table_4.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 2,
+ "pair_id": "A_origin_1_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_1/A_table_5.png",
+ "data/Academic/Table/A_origin_1/A_table_6.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 3,
+ "pair_id": "A_origin_2_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_2/A_table_8.png",
+ "data/Academic/Table/A_origin_2/A_table_9.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 4,
+ "pair_id": "A_origin_3_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_3/A_table_10.png",
+ "data/Academic/Table/A_origin_3/A_table_11.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 5,
+ "pair_id": "A_origin_4_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_4/A_table_12.png",
+ "data/Academic/Table/A_origin_4/A_table_13.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 6,
+ "pair_id": "A_origin_5_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_5/A_table_14.png",
+ "data/Academic/Table/A_origin_5/A_table_15.png",
+ "data/Academic/Table/A_origin_5/A_table_16.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 7,
+ "pair_id": "A_origin_6_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_6/A_table_17.png",
+ "data/Academic/Table/A_origin_6/A_table_18.png",
+ "data/Academic/Table/A_origin_6/A_table_19.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 8,
+ "pair_id": "A_origin_7_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_7/A_table_20.png",
+ "data/Academic/Table/A_origin_7/A_table_21.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 9,
+ "pair_id": "A_origin_8_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_8/A_table_22.png",
+ "data/Academic/Table/A_origin_8/A_table_23.png",
+ "data/Academic/Table/A_origin_8/A_table_24.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 10,
+ "pair_id": "A_origin_9_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_9/A_table_25.png",
+ "data/Academic/Table/A_origin_9/A_table_26.png",
+ "data/Academic/Table/A_origin_9/A_table_27.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 11,
+ "pair_id": "A_origin_10_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_10/A_table_28.png",
+ "data/Academic/Table/A_origin_10/A_table_29.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 12,
+ "pair_id": "A_origin_11_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_11/A_table_30.png",
+ "data/Academic/Table/A_origin_11/A_table_31.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 13,
+ "pair_id": "A_origin_12_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_12/A_table_32.png",
+ "data/Academic/Table/A_origin_12/A_table_33.png",
+ "data/Academic/Table/A_origin_12/A_table_34.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 14,
+ "pair_id": "A_origin_14_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_14/A_table_39.png",
+ "data/Academic/Table/A_origin_14/A_table_40.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 15,
+ "pair_id": "A_origin_18_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_18/A_table_47.png",
+ "data/Academic/Table/A_origin_18/A_table_48.png",
+ "data/Academic/Table/A_origin_18/A_table_49.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 16,
+ "pair_id": "A_origin_26_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_26/A_table_59_01.png",
+ "data/Academic/Table/A_origin_26/A_table_59_02.png",
+ "data/Academic/Table/A_origin_26/A_table_60.png",
+ "data/Academic/Table/A_origin_26/A_table_61.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 17,
+ "pair_id": "A_origin_28_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_28/A_table_63_01.png",
+ "data/Academic/Table/A_origin_28/A_table_63_02.png",
+ "data/Academic/Table/A_origin_28/A_table_64.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 18,
+ "pair_id": "A_origin_36_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_36/A_table_73.png",
+ "data/Academic/Table/A_origin_36/A_table_75.png"
+ ],
+ "domain": "academic"
+ },
+ {
+ "index": 19,
+ "pair_id": "A_origin_43_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_43/A_table_89.png",
+ "data/Academic/Table/A_origin_43/A_table_90.png"
+ ],
+ "domain": "academic"
+ }
+]
\ No newline at end of file
diff --git a/multi_image_json_list/test_business_input.json b/multi_image_json_list/test_business_input.json
new file mode 100755
index 0000000..ace8a1f
--- /dev/null
+++ b/multi_image_json_list/test_business_input.json
@@ -0,0 +1,173 @@
+[
+ {
+ "index": 0,
+ "pair_id": "B_origin_0_0_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_0/B_table_0_0.png",
+ "data/Business/Table/B_origin_0/B_table_1_0.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 1,
+ "pair_id": "B_origin_3_3_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_3/B_table_10_0.png",
+ "data/Business/Table/B_origin_3/B_table_11_0.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 2,
+ "pair_id": "B_origin_4_4_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_4/B_table_16_0.png",
+ "data/Business/Table/B_origin_4/B_table_16_1.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 3,
+ "pair_id": "B_origin_4_4_1",
+ "image_paths": [
+ "data/Business/Table/B_origin_4/B_table_14_0.png",
+ "data/Business/Table/B_origin_4/B_table_15_0.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 4,
+ "pair_id": "B_origin_6_6_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_6/B_table_20_0.png",
+ "data/Business/Table/B_origin_6/B_table_21_0.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 5,
+ "pair_id": "B_origin_2_10_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_2/B_table_6_0.png",
+ "data/Business/Table/B_origin_6/B_table_23_0.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 7,
+ "pair_id": "B_origin_14_14_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_14/B_table_45_0.png",
+ "data/Business/Table/B_origin_14/B_table_45_1.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 8,
+ "pair_id": "B_origin_15_15_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_15/B_table_51_0.png",
+ "data/Business/Table/B_origin_15/B_table_51_1.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 9,
+ "pair_id": "B_origin_10_15_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_10/B_table_33_0.png",
+ "data/Business/Table/B_origin_15/B_table_52_0.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 10,
+ "pair_id": "B_origin_18_18_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_18/B_table_63_0.png",
+ "data/Business/Table/B_origin_18/B_table_63_1.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 11,
+ "pair_id": "B_origin_18_18_1",
+ "image_paths": [
+ "data/Business/Table/B_origin_18/B_table_61_0.png",
+ "data/Business/Table/B_origin_18/B_table_64_0.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 12,
+ "pair_id": "B_origin_20_20_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_20/B_table_68_0.png",
+ "data/Business/Table/B_origin_20/B_table_69_0.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 13,
+ "pair_id": "B_origin_21_21_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_21/B_table_70_0.png",
+ "data/Business/Table/B_origin_21/B_table_71_0.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 14,
+ "pair_id": "B_origin_21_21_1",
+ "image_paths": [
+ "data/Business/Table/B_origin_21/B_table_72_0.png",
+ "data/Business/Table/B_origin_21/B_table_72_1.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 15,
+ "pair_id": "B_origin_23_23_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_23/B_table_80_0.png",
+ "data/Business/Table/B_origin_23/B_table_81_0.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 16,
+ "pair_id": "B_origin_23_23_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_23/B_table_80_0.png",
+ "data/Business/Table/B_origin_23/B_table_81_0.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 17,
+ "pair_id": "B_origin_24_24_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_24/B_table_83_0.png",
+ "data/Business/Table/B_origin_24/B_table_84_0.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 18,
+ "pair_id": "B_origin_32_32_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_32/B_table_110_0.png",
+ "data/Business/Table/B_origin_32/B_table_112_0.png"
+ ],
+ "domain": "Business"
+ },
+ {
+ "index": 19,
+ "pair_id": "B_origin_37_37_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_37/B_table_132_0.png",
+ "data/Business/Table/B_origin_37/B_table_132_1.png"
+ ],
+ "domain": "Business"
+ }
+]
\ No newline at end of file
diff --git a/multi_image_json_list/test_finance_input.json b/multi_image_json_list/test_finance_input.json
new file mode 100755
index 0000000..98b7491
--- /dev/null
+++ b/multi_image_json_list/test_finance_input.json
@@ -0,0 +1,182 @@
+[
+ {
+ "index": 0,
+ "pair_id": "F_table_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_0/F_table_0_0.png",
+ "data/Finance/Table/F_origin_3/F_table_5_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 1,
+ "pair_id": "F_table_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_4/F_table_6_0.png",
+ "data/Finance/Table/F_origin_6/F_table_8_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 2,
+ "pair_id": "F_table_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_10/F_table_14_0.png",
+ "data/Finance/Table/F_origin_16/F_table_16_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 3,
+ "pair_id": "F_table_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_21/F_table_71_0.png",
+ "data/Finance/Table/F_origin_23/F_table_74_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 4,
+ "pair_id": "F_table_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_38/F_table_122_0.png",
+ "data/Finance/Table/F_origin_39/F_table_123_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 5,
+ "pair_id": "F_table_5",
+ "image_paths": [
+ "data/Finance/Table/F_origin_41/F_table_129_0.png",
+ "data/Finance/Table/F_origin_42/F_table_130_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 6,
+ "pair_id": "F_table_6",
+ "image_paths": [
+ "data/Finance/Table/F_origin_43/F_table_131_0.png",
+ "data/Finance/Table/F_origin_44/F_table_132_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 7,
+ "pair_id": "F_table_7",
+ "image_paths": [
+ "data/Finance/Table/F_origin_45/F_table_136_0.png",
+ "data/Finance/Table/F_origin_49/F_table_146_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 8,
+ "pair_id": "F_table_8",
+ "image_paths": [
+ "data/Finance/Table/F_origin_0/F_table_0_0.png",
+ "data/Finance/Table/F_origin_0/F_table_1_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 9,
+ "pair_id": "F_table_9",
+ "image_paths": [
+ "data/Finance/Table/F_origin_2/F_table_3_1.png",
+ "data/Finance/Table/F_origin_2/F_table_4_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 10,
+ "pair_id": "F_table_10",
+ "image_paths": [
+ "data/Finance/Table/F_origin_6/F_table_8_1.png",
+ "data/Finance/Table/F_origin_6/F_table_9_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 11,
+ "pair_id": "F_table_11",
+ "image_paths": [
+ "data/Finance/Table/F_origin_16/F_table_39_0.png",
+ "data/Finance/Table/F_origin_16/F_table_45_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 12,
+ "pair_id": "F_table_12",
+ "image_paths": [
+ "data/Finance/Table/F_origin_1/F_table_2_0.png",
+ "data/Finance/Table/F_origin_11/F_table_15_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 13,
+ "pair_id": "F_table_13",
+ "image_paths": [
+ "data/Finance/Table/F_origin_3/F_table_5_0.png",
+ "data/Finance/Table/F_origin_13/F_table_21_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 14,
+ "pair_id": "F_table_14",
+ "image_paths": [
+ "data/Finance/Table/F_origin_4/F_table_6_0.png",
+ "data/Finance/Table/F_origin_23/F_table_74_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 15,
+ "pair_id": "F_table_15",
+ "image_paths": [
+ "data/Finance/Table/F_origin_10/F_table_14_0.png",
+ "data/Finance/Table/F_origin_21/F_table_71_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 16,
+ "pair_id": "F_table_16",
+ "image_paths": [
+ "data/Finance/Table/F_origin_12/F_table_16_0.png",
+ "data/Finance/Table/F_origin_45/F_table_136_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 17,
+ "pair_id": "F_table_17",
+ "image_paths": [
+ "data/Finance/Table/F_origin_17/F_table_49_0.png",
+ "data/Finance/Table/F_origin_48/F_table_143_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 18,
+ "pair_id": "F_table_18",
+ "image_paths": [
+ "data/Finance/Table/F_origin_38/F_table_122_0.png",
+ "data/Finance/Table/F_origin_45/F_table_136_0.png"
+ ],
+ "domain": "finance"
+ },
+ {
+ "index": 19,
+ "pair_id": "F_table_19",
+ "image_paths": [
+ "data/Finance/Table/F_origin_43/F_table_131_0.png",
+ "data/Finance/Table/F_origin_49/F_table_146_0.png"
+ ],
+ "domain": "finance"
+ }
+]
\ No newline at end of file
diff --git a/multi_image_json_list/test_medical_input.json b/multi_image_json_list/test_medical_input.json
new file mode 100755
index 0000000..279ef70
--- /dev/null
+++ b/multi_image_json_list/test_medical_input.json
@@ -0,0 +1,164 @@
+[
+ {
+ "index": 0,
+ "pair_id": "M_origin_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_0_0_0.png",
+ "data/Medical/Table/M_table_0_1_0.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 1,
+ "pair_id": "M_origin_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_0_0.png",
+ "data/Medical/Table/M_table_2_1_0.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 2,
+ "pair_id": "M_origin_3",
+ "image_paths": [
+ "data/Medical/Table/M_table_3_0_1.png",
+ "data/Medical/Table/M_table_3_0_2.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 3,
+ "pair_id": "M_origin_4",
+ "image_paths": [
+ "data/Medical/Table/M_table_4_0_0.png",
+ "data/Medical/Table/M_table_4_0_1.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 4,
+ "pair_id": "M_revised_2_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_2_0.png",
+ "data/Medical/Table/M_table_2_2_1.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 5,
+ "pair_id": "M_revised_3_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_3_0_0.png",
+ "data/Medical/Table/M_table_3_0_1.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 6,
+ "pair_id": "M_revised_4_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_4_0_0.png",
+ "data/Medical/Table/M_table_4_0_1.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 7,
+ "pair_id": "M_revised_6_3",
+ "image_paths": [
+ "data/Medical/Table/M_table_6_3_0.png",
+ "data/Medical/Table/M_table_6_3_1.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 8,
+ "pair_id": "M_revised_8_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_8_0_0.png",
+ "data/Medical/Table/M_table_8_0_1.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 9,
+ "pair_id": "M_revised_9_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_9_0_0.png",
+ "data/Medical/Table/M_table_9_0_1.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 10,
+ "pair_id": "M_revised_10_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_0_0.png",
+ "data/Medical/Table/M_table_10_0_1.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 11,
+ "pair_id": "M_revised_11_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_11_0_0.png",
+ "data/Medical/Table/M_table_11_0_1.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 12,
+ "pair_id": "M_revised_13_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_13_0_0.png",
+ "data/Medical/Table/M_table_13_0_1.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 13,
+ "pair_id": "M_revised_14_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_14_0_0.png",
+ "data/Medical/Table/M_table_14_0_1.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 14,
+ "pair_id": "M_revised_15_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_0_0.png",
+ "data/Medical/Table/M_table_15_0_1.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 15,
+ "pair_id": "M_revised_16_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_16_0_0.png",
+ "data/Medical/Table/M_table_16_0_1.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 16,
+ "pair_id": "M_revised_2_3",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_3_0.png",
+ "data/Medical/Table/M_table_2_3_1.png"
+ ],
+ "domain": "medical"
+ },
+ {
+ "index": 17,
+ "pair_id": "M_revised_10_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_1_0.png",
+ "data/Medical/Table/M_table_10_1_1.png"
+ ],
+ "domain": "medical"
+ }
+]
\ No newline at end of file
diff --git a/multi_image_json_list/test_public_input.json b/multi_image_json_list/test_public_input.json
new file mode 100755
index 0000000..832ef44
--- /dev/null
+++ b/multi_image_json_list/test_public_input.json
@@ -0,0 +1,182 @@
+[
+ {
+ "index": 0,
+ "pair_id": "P_origin_0_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_0/P_origin_0_1_0.png",
+ "data/Public/Table/P_origin_0/P_origin_0_1_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 1,
+ "pair_id": "P_origin_0_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_0/P_origin_0_2_1.png",
+ "data/Public/Table/P_origin_0/P_origin_0_2_2.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 2,
+ "pair_id": "P_origin_1_9",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_9_0.png",
+ "data/Public/Table/P_origin_1/P_origin_1_9_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 3,
+ "pair_id": "P_origin_1_10",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_10_0.png",
+ "data/Public/Table/P_origin_1/P_origin_1_10_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 4,
+ "pair_id": "P_origin_1_12",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_12_0.png",
+ "data/Public/Table/P_origin_1/P_origin_1_12_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 5,
+ "pair_id": "P_origin_1_16",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_16_0.png",
+ "data/Public/Table/P_origin_1/P_origin_1_16_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 6,
+ "pair_id": "P_origin_1_17",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_17_0.png",
+ "data/Public/Table/P_origin_1/P_origin_1_17_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 7,
+ "pair_id": "P_origin_1_18",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_18_0.png",
+ "data/Public/Table/P_origin_1/P_origin_1_18_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 8,
+ "pair_id": "P_origin_1_23",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_23_0.png",
+ "data/Public/Table/P_origin_1/P_origin_1_23_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 9,
+ "pair_id": "P_origin_2_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_2/P_origin_2_0_0.png",
+ "data/Public/Table/P_origin_2/P_origin_2_0_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 10,
+ "pair_id": "P_origin_3_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_3/P_origin_3_2_0.png",
+ "data/Public/Table/P_origin_3/P_origin_3_2_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 11,
+ "pair_id": "P_origin_4_9",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_9_0.png",
+ "data/Public/Table/P_origin_4/P_origin_4_9_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 12,
+ "pair_id": "P_origin_4_11",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_11_0.png",
+ "data/Public/Table/P_origin_4/P_origin_4_11_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 13,
+ "pair_id": "P_origin_5_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_1_0.png",
+ "data/Public/Table/P_origin_5/P_origin_5_1_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 14,
+ "pair_id": "P_origin_5_17",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_17_0.png",
+ "data/Public/Table/P_origin_5/P_origin_5_17_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 15,
+ "pair_id": "P_origin_6_12",
+ "image_paths": [
+ "data/Public/Table/P_origin_6/P_origin_6_12_0.png",
+ "data/Public/Table/P_origin_6/P_origin_6_12_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 16,
+ "pair_id": "P_origin_7_4",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_4_0.png",
+ "data/Public/Table/P_origin_7/P_origin_7_4_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 17,
+ "pair_id": "P_origin_7_8",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_8_0.png",
+ "data/Public/Table/P_origin_7/P_origin_7_8_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 18,
+ "pair_id": "P_origin_8_14",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_14_0.png",
+ "data/Public/Table/P_origin_8/P_origin_8_14_1.png"
+ ],
+ "domain": "public"
+ },
+ {
+ "index": 19,
+ "pair_id": "P_origin_9_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_0_1.png",
+ "data/Public/Table/P_origin_9/P_origin_9_0_2.png"
+ ],
+ "domain": "public"
+ }
+]
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 513cc81..8d5c460 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,4 +32,5 @@ dependencies = [
"pymongo>=4.6.1",
"langgraph-checkpoint-sqlite>=3.0.1",
"notion-client>=2.0.0",
+ "playwright>=1.57.0",
]
diff --git a/regenerate_qa.py b/regenerate_qa.py
new file mode 100755
index 0000000..008dac4
--- /dev/null
+++ b/regenerate_qa.py
@@ -0,0 +1,455 @@
+#!/usr/bin/env python3
+"""
+기존 synthetic 테이블에서 QA를 재생성하는 스크립트.
+
+output_*/html/ 디렉토리의 HTML 파일을 직접 읽어서 새로운 QA pairs를 생성합니다.
+pipeline_output.json은 entry 목록과 결과 저장에만 사용됩니다.
+
+Usage:
+ # 특정 도메인 재생성
+ python regenerate_qa.py --domain business
+
+ # 여러 도메인 재생성
+ python regenerate_qa.py --domain business finance academic medical
+
+ # 모든 도메인 재생성 (output_public 제외)
+ python regenerate_qa.py --all
+
+ # 특정 provider/model 사용
+ python regenerate_qa.py --domain business --provider openai --model gpt-4o
+"""
+
+import argparse
+import json
+import logging
+import os
+import re
+import sys
+from pathlib import Path
+from typing import List, Dict, Any, Optional
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# Add project root to path
+project_root = Path(__file__).parent
+sys.path.insert(0, str(project_root))
+
+from generate_synthetic_table.flow import (
+ _load_prompt,
+ _call_llm,
+ robust_json_parse,
+)
+
+logging.basicConfig(
+ level=logging.INFO,
+ format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+# 도메인별 output 디렉토리
+DOMAIN_DIRS = {
+ "academic": "output_academic",
+ "business": "output_business",
+ "finance": "output_finance",
+ "medical": "output_medical",
+ # "public": "output_public", # 제외
+}
+
+
+def get_llm_client(provider: str, model: str):
+ """LLM 클라이언트 생성"""
+ from langchain_openai import ChatOpenAI
+ from langchain_anthropic import ChatAnthropic
+ from langchain_google_genai import ChatGoogleGenerativeAI
+
+ if provider == "openai":
+ return ChatOpenAI(
+ model=model,
+ temperature=0.7,
+ api_key=os.getenv("OPENAI_API_KEY"),
+ )
+ elif provider in ["claude", "anthropic"]:
+ return ChatAnthropic(
+ model=model,
+ temperature=0.7,
+ api_key=os.getenv("ANTHROPIC_API_KEY"),
+ )
+ elif provider in ["gemini", "google"]:
+ return ChatGoogleGenerativeAI(
+ model=model,
+ temperature=0.7,
+ google_api_key=os.getenv("GOOGLE_API_KEY"),
+ )
+ else:
+ raise ValueError(f"Unknown provider: {provider}")
+
+
+def find_html_files(output_dir: Path, pair_id: str) -> List[Path]:
+ """
+ output_dir/html/ 디렉토리에서 pair_id에 해당하는 HTML 파일들을 찾습니다.
+
+ 파일 패턴: {pair_id}_table_*.html
+ 예: B_origin_0_0_0_table_0.html, B_origin_0_0_0_table_1.html
+ """
+ html_dir = output_dir / "html"
+ if not html_dir.exists():
+ return []
+
+ # pair_id로 시작하는 HTML 파일 찾기
+ pattern = f"{pair_id}_table_*.html"
+ html_files = sorted(html_dir.glob(pattern))
+
+ return html_files
+
+
+def read_html_files(html_files: List[Path]) -> List[str]:
+ """
+ HTML 파일들을 읽어서 내용을 반환합니다.
+ """
+ html_contents = []
+ for html_file in html_files:
+ try:
+ with open(html_file, "r", encoding="utf-8") as f:
+ content = f.read().strip()
+ if content:
+ html_contents.append(content)
+ except Exception as e:
+ logger.warning(f"Failed to read {html_file}: {e}")
+
+ return html_contents
+
+
+def generate_qa_for_table(
+ llm,
+ synthetic_html: str,
+ domain: str,
+) -> List[Dict[str, Any]]:
+ """단일 synthetic table에 대해 QA를 생성합니다."""
+ try:
+ prompt_template = _load_prompt("generate_qa", domain)
+ prompt = prompt_template.format(synthetic_html=synthetic_html)
+
+ response_text, _ = _call_llm(llm, prompt, return_token_usage=True)
+ response_json = robust_json_parse(response_text)
+
+ if response_json and "qa_pairs" in response_json:
+ return response_json["qa_pairs"]
+ else:
+ logger.warning("QA generation did not return valid qa_pairs")
+ return []
+ except Exception as e:
+ logger.error(f"Failed to generate QA: {e}")
+ return []
+
+
+def generate_long_sequence_for_table(
+ llm,
+ synthetic_html: str,
+ domain: str,
+) -> List[Dict[str, Any]]:
+ """단일 synthetic table에 대해 long_sequence QA를 생성합니다."""
+ try:
+ prompt_template = _load_prompt("generate_long_sequence", domain)
+ prompt = prompt_template.format(synthetic_html=synthetic_html)
+
+ response_text, _ = _call_llm(llm, prompt, return_token_usage=True)
+ response_json = robust_json_parse(response_text)
+
+ if response_json and "qa_pairs" in response_json:
+ return response_json["qa_pairs"]
+ else:
+ return []
+ except ValueError:
+ # generate_long_sequence prompt not found
+ return []
+ except Exception as e:
+ logger.warning(f"Failed to generate long_sequence QA: {e}")
+ return []
+
+
+def regenerate_qa_for_entry(
+ llm,
+ entry: Dict[str, Any],
+ output_dir: Path,
+ domain: str,
+ include_long_sequence: bool = True,
+) -> Dict[str, Any]:
+ """
+ 단일 entry에 대해 QA를 재생성합니다.
+
+ html/ 디렉토리에서 HTML 파일을 직접 읽어서 QA를 생성합니다.
+ 여러 테이블이 있는 경우, 각 테이블에 대해 QA를 생성하고 합칩니다.
+ """
+ pair_id = entry.get("pair_id", entry.get("name", "unknown"))
+
+ # HTML 파일 찾기 및 읽기
+ html_files = find_html_files(output_dir, pair_id)
+ if not html_files:
+ logger.warning(f"No HTML files found for {pair_id} in {output_dir}/html/")
+ return entry
+
+ synthetic_tables = read_html_files(html_files)
+ if not synthetic_tables:
+ logger.warning(f"Failed to read HTML files for {pair_id}")
+ return entry
+
+ logger.info(f" Found {len(html_files)} HTML files: {[f.name for f in html_files]}")
+
+ all_qa_results = []
+
+ # 각 테이블에 대해 QA 생성
+ for idx, synthetic_html in enumerate(synthetic_tables):
+ logger.info(f" Generating QA for table {idx + 1}/{len(synthetic_tables)}")
+
+ # 기본 QA 생성 (9개 타입)
+ qa_results = generate_qa_for_table(llm, synthetic_html, domain)
+ all_qa_results.extend(qa_results)
+
+ # long_sequence QA 생성 (선택적)
+ if include_long_sequence:
+ long_seq_results = generate_long_sequence_for_table(llm, synthetic_html, domain)
+ all_qa_results.extend(long_seq_results)
+
+ # 결과 업데이트
+ updated_entry = entry.copy()
+ updated_entry["qa_results"] = all_qa_results
+ updated_entry["qa_regenerated_at"] = datetime.now().isoformat()
+ updated_entry["html_files_used"] = [f.name for f in html_files]
+
+ return updated_entry
+
+
+def regenerate_qa_for_domain(
+ domain: str,
+ provider: str = "claude",
+ model: str = "claude-sonnet-4-5",
+ include_long_sequence: bool = True,
+ limit: Optional[int] = None,
+ dry_run: bool = False,
+) -> Dict[str, Any]:
+ """
+ 특정 도메인의 모든 entry에 대해 QA를 재생성합니다.
+ """
+ domain_dir = DOMAIN_DIRS.get(domain)
+ if not domain_dir:
+ raise ValueError(f"Unknown domain: {domain}")
+
+ output_dir = project_root / domain_dir
+ pipeline_output_path = output_dir / "pipeline_output.json"
+
+ if not pipeline_output_path.exists():
+ raise FileNotFoundError(f"pipeline_output.json not found: {pipeline_output_path}")
+
+ # Load existing data
+ with open(pipeline_output_path, "r", encoding="utf-8") as f:
+ data = json.load(f)
+
+ logger.info(f"Loaded {len(data)} entries from {pipeline_output_path}")
+
+ if limit:
+ data = data[:limit]
+ logger.info(f"Limited to {limit} entries")
+
+ if dry_run:
+ logger.info("Dry run mode - not regenerating QA")
+ # HTML 파일 존재 여부 확인
+ html_dir = output_dir / "html"
+ if not html_dir.exists():
+ logger.warning(f"HTML directory not found: {html_dir}")
+ return {"domain": domain, "entries": len(data), "dry_run": True, "html_dir_exists": False}
+
+ # 각 entry에 대해 HTML 파일 수 확인
+ entries_with_html = 0
+ total_html_files = 0
+ for entry in data:
+ pair_id = entry.get("pair_id", entry.get("name", ""))
+ html_files = find_html_files(output_dir, pair_id)
+ if html_files:
+ entries_with_html += 1
+ total_html_files += len(html_files)
+ logger.info(f" {pair_id}: {len(html_files)} HTML files")
+
+ logger.info(f"Summary: {entries_with_html}/{len(data)} entries have HTML files ({total_html_files} total)")
+ return {
+ "domain": domain,
+ "entries": len(data),
+ "entries_with_html": entries_with_html,
+ "total_html_files": total_html_files,
+ "dry_run": True,
+ }
+
+ # Create LLM client
+ llm = get_llm_client(provider, model)
+
+ # Regenerate QA for each entry
+ updated_data = []
+ success_count = 0
+ error_count = 0
+
+ for i, entry in enumerate(data):
+ pair_id = entry.get("pair_id", entry.get("name", f"entry_{i}"))
+ logger.info(f"[{i + 1}/{len(data)}] Processing: {pair_id}")
+
+ try:
+ updated_entry = regenerate_qa_for_entry(
+ llm,
+ entry,
+ output_dir,
+ domain,
+ include_long_sequence=include_long_sequence,
+ )
+ updated_data.append(updated_entry)
+
+ qa_count = len(updated_entry.get("qa_results", []))
+ logger.info(f" Generated {qa_count} QA pairs")
+ success_count += 1
+ except Exception as e:
+ logger.error(f" Failed: {e}")
+ updated_data.append(entry) # Keep original
+ error_count += 1
+
+ # Backup original file
+ backup_path = output_dir / f"pipeline_output_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+ with open(backup_path, "w", encoding="utf-8") as f:
+ json.dump(data, f, ensure_ascii=False, indent=2)
+ logger.info(f"Backed up original to {backup_path}")
+
+ # Save updated data
+ with open(pipeline_output_path, "w", encoding="utf-8") as f:
+ json.dump(updated_data, f, ensure_ascii=False, indent=2)
+ logger.info(f"Saved updated data to {pipeline_output_path}")
+
+ return {
+ "domain": domain,
+ "total_entries": len(data),
+ "success": success_count,
+ "errors": error_count,
+ "backup": str(backup_path),
+ }
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description="기존 synthetic 테이블에서 QA를 재생성합니다.",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog="""
+Examples:
+ # 특정 도메인 재생성
+ python regenerate_qa.py --domain business
+
+ # 여러 도메인 재생성
+ python regenerate_qa.py --domain business finance
+
+ # 모든 도메인 재생성 (output_public 제외)
+ python regenerate_qa.py --all
+
+ # OpenAI 사용
+ python regenerate_qa.py --domain business --provider openai --model gpt-4o
+
+ # 테스트 (5개만)
+ python regenerate_qa.py --domain business --limit 5
+
+ # Dry run (실제 재생성 없이 확인만)
+ python regenerate_qa.py --domain business --dry-run
+ """
+ )
+
+ parser.add_argument(
+ "--domain",
+ nargs="+",
+ choices=list(DOMAIN_DIRS.keys()),
+ help="재생성할 도메인(들)",
+ )
+ parser.add_argument(
+ "--all",
+ action="store_true",
+ help="모든 도메인 재생성 (output_public 제외)",
+ )
+ parser.add_argument(
+ "--provider",
+ default="claude",
+ choices=["claude", "anthropic", "openai", "gemini", "google"],
+ help="LLM 제공자 (default: claude)",
+ )
+ parser.add_argument(
+ "--model",
+ default="claude-sonnet-4-5",
+ help="모델 이름 (default: claude-sonnet-4-5)",
+ )
+ parser.add_argument(
+ "--no-long-sequence",
+ action="store_true",
+ help="long_sequence QA 생성 스킵",
+ )
+ parser.add_argument(
+ "--limit",
+ type=int,
+ help="처리할 최대 entry 수 (테스트용)",
+ )
+ parser.add_argument(
+ "--dry-run",
+ action="store_true",
+ help="실제 재생성 없이 확인만",
+ )
+
+ args = parser.parse_args()
+
+ # Determine domains to process
+ if args.all:
+ domains = list(DOMAIN_DIRS.keys())
+ elif args.domain:
+ domains = args.domain
+ else:
+ parser.error("--domain 또는 --all을 지정해야 합니다.")
+
+ logger.info(f"Domains to process: {domains}")
+ logger.info(f"Provider: {args.provider}, Model: {args.model}")
+
+ # Process each domain
+ results = []
+ for domain in domains:
+ logger.info(f"\n{'='*60}")
+ logger.info(f"Processing domain: {domain}")
+ logger.info(f"{'='*60}")
+
+ try:
+ result = regenerate_qa_for_domain(
+ domain=domain,
+ provider=args.provider,
+ model=args.model,
+ include_long_sequence=not args.no_long_sequence,
+ limit=args.limit,
+ dry_run=args.dry_run,
+ )
+ results.append(result)
+ logger.info(f"Completed: {result}")
+ except Exception as e:
+ logger.error(f"Failed to process {domain}: {e}")
+ results.append({"domain": domain, "error": str(e)})
+
+ # Summary
+ print("\n" + "=" * 60)
+ print(" QA Regeneration Summary")
+ print("=" * 60)
+ for result in results:
+ domain = result.get("domain", "unknown")
+ if "error" in result:
+ print(f" {domain}: ERROR - {result['error']}")
+ elif result.get("dry_run"):
+ html_info = ""
+ if "entries_with_html" in result:
+ html_info = f", {result['entries_with_html']}/{result['entries']} with HTML ({result['total_html_files']} files)"
+ elif result.get("html_dir_exists") is False:
+ html_info = ", NO html/ directory!"
+ print(f" {domain}: {result.get('entries', 0)} entries (dry run){html_info}")
+ else:
+ print(f" {domain}: {result.get('success', 0)}/{result.get('total_entries', 0)} success, {result.get('errors', 0)} errors")
+ print("=" * 60)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/run_all.sh b/run_all.sh
new file mode 100755
index 0000000..87ed09f
--- /dev/null
+++ b/run_all.sh
@@ -0,0 +1,536 @@
+#!/bin/bash
+
+# ==============================================================================
+# TableMagnifier - Master Pipeline Script
+# ==============================================================================
+#
+# 전체 파이프라인을 통합 실행합니다:
+# 1. Synthetic Table 생성 (from JSON input)
+# 2. HTML → Image 변환
+# 3. QA 재생성 (선택)
+# 4. QA 난이도 필터링 (vLLM 필요)
+# 5. 평가 (vLLM 필요)
+#
+# Usage:
+# ./run_all.sh --input data.json --domain business [OPTIONS]
+#
+# Examples:
+# # 기본 파이프라인 (테이블 생성 + 이미지 변환)
+# ./run_all.sh --input test.json --domain business
+#
+# # 전체 파이프라인 (vLLM 평가 포함)
+# ./run_all.sh --input test.json --domain business --with-eval --vllm-url http://localhost:8000/v1
+#
+# # QA 재생성만
+# ./run_all.sh --domain business --regenerate-qa-only
+#
+# # 필터링 + 평가만 (이미 테이블/이미지가 있는 경우)
+# ./run_all.sh --domain business --filter-only --with-eval
+#
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+
+# ==============================================================================
+# Configuration
+# ==============================================================================
+
+# Default values
+INPUT_JSON=""
+DOMAIN=""
+OUTPUT_DIR=""
+PROVIDER="claude"
+MODEL="claude-sonnet-4-5"
+VLLM_URL="http://localhost:8000/v1"
+
+# Pipeline steps (default: generate + capture)
+DO_GENERATE=true
+DO_CAPTURE=true
+DO_REGENERATE_QA=false
+DO_FILTER=false
+DO_EVAL=false
+
+# Options
+LIMIT=""
+DRY_RUN=false
+SKIP_QA=false
+FILTER_TRIALS=10
+FILTER_MIN_ACC=0.3
+FILTER_MAX_ACC=0.6
+
+# Colors
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+RED='\033[0;31m'
+BLUE='\033[0;34m'
+CYAN='\033[0;36m'
+BOLD='\033[1m'
+NC='\033[0m'
+
+# ==============================================================================
+# Helper Functions
+# ==============================================================================
+
+echo_header() {
+ echo ""
+ echo -e "${BLUE}${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+ echo -e "${BLUE}${BOLD} $1${NC}"
+ echo -e "${BLUE}${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+}
+
+echo_step() {
+ echo ""
+ echo -e "${CYAN}▶ STEP $1: $2${NC}"
+ echo -e "${CYAN}─────────────────────────────────────────────────────────────${NC}"
+}
+
+echo_info() {
+ echo -e "${GREEN}[INFO]${NC} $1"
+}
+
+echo_warn() {
+ echo -e "${YELLOW}[WARN]${NC} $1"
+}
+
+echo_error() {
+ echo -e "${RED}[ERROR]${NC} $1"
+}
+
+echo_success() {
+ echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+show_help() {
+ cat << 'EOF'
+Usage: ./run_all.sh [OPTIONS]
+
+TableMagnifier 전체 파이프라인을 통합 실행합니다.
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ Required Options
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ --domain DOMAIN 도메인 (business, finance, academic, medical, public)
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ Pipeline Steps
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ --input FILE 입력 JSON 파일 (테이블 생성 시 필수)
+ --regenerate-qa QA 재생성 포함
+ --regenerate-qa-only QA 재생성만 실행 (테이블 생성 스킵)
+ --with-filter vLLM으로 QA 난이도 필터링 포함
+ --filter-only 필터링만 실행 (테이블/이미지 생성 스킵)
+ --with-eval vLLM으로 평가 포함
+ --eval-only 평가만 실행
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ Generation Options
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ --output-dir DIR 출력 디렉토리 (default: output_{domain})
+ --provider PROVIDER LLM 제공자: claude, openai, gemini (default: claude)
+ --model MODEL 모델 이름 (default: claude-sonnet-4-5)
+ --skip-qa 테이블 생성 시 QA 생성 스킵
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ vLLM Options (for filter/eval)
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ --vllm-url URL vLLM 서버 URL (default: http://localhost:8000/v1)
+ --filter-trials N 필터링 시 QA당 시도 횟수 (default: 10)
+ --filter-min-acc FLOAT 필터링 최소 정확도 (default: 0.3)
+ --filter-max-acc FLOAT 필터링 최대 정확도 (default: 0.6)
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ Other Options
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ --limit N 처리할 최대 entry 수 (테스트용)
+ --dry-run 실제 실행 없이 확인만
+ -h, --help 도움말 표시
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ Examples
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+ # 1. 기본 파이프라인 (테이블 생성 → 이미지 변환)
+ ./run_all.sh --input test.json --domain business
+
+ # 2. 전체 파이프라인 (생성 → 이미지 → 필터링 → 평가)
+ ./run_all.sh --input test.json --domain business --with-filter --with-eval
+
+ # 3. QA만 재생성 (기존 테이블 유지)
+ ./run_all.sh --domain business --regenerate-qa-only
+
+ # 4. 필터링만 (이미 이미지가 있는 경우)
+ ./run_all.sh --domain business --filter-only
+
+ # 5. 평가만
+ ./run_all.sh --domain business --eval-only
+
+ # 6. OpenAI 사용
+ ./run_all.sh --input test.json --domain business --provider openai --model gpt-4o
+
+ # 7. 테스트 (3개만)
+ ./run_all.sh --input test.json --domain business --limit 3 --dry-run
+
+EOF
+}
+
+# ==============================================================================
+# Argument Parsing
+# ==============================================================================
+
+# Parse first argument as JSON file if it ends with .json
+if [[ "$1" == *.json ]]; then
+ INPUT_JSON="$1"
+ shift
+fi
+
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ --input)
+ INPUT_JSON="$2"
+ shift 2
+ ;;
+ --domain)
+ DOMAIN="$2"
+ shift 2
+ ;;
+ --output-dir)
+ OUTPUT_DIR="$2"
+ shift 2
+ ;;
+ --provider)
+ PROVIDER="$2"
+ shift 2
+ ;;
+ --model)
+ MODEL="$2"
+ shift 2
+ ;;
+ --vllm-url)
+ VLLM_URL="$2"
+ shift 2
+ ;;
+ --regenerate-qa)
+ DO_REGENERATE_QA=true
+ shift
+ ;;
+ --regenerate-qa-only)
+ DO_GENERATE=false
+ DO_CAPTURE=false
+ DO_REGENERATE_QA=true
+ shift
+ ;;
+ --with-filter)
+ DO_FILTER=true
+ shift
+ ;;
+ --filter-only)
+ DO_GENERATE=false
+ DO_CAPTURE=false
+ DO_FILTER=true
+ shift
+ ;;
+ --with-eval)
+ DO_EVAL=true
+ shift
+ ;;
+ --eval-only)
+ DO_GENERATE=false
+ DO_CAPTURE=false
+ DO_EVAL=true
+ shift
+ ;;
+ --filter-trials)
+ FILTER_TRIALS="$2"
+ shift 2
+ ;;
+ --filter-min-acc)
+ FILTER_MIN_ACC="$2"
+ shift 2
+ ;;
+ --filter-max-acc)
+ FILTER_MAX_ACC="$2"
+ shift 2
+ ;;
+ --skip-qa)
+ SKIP_QA=true
+ shift
+ ;;
+ --limit)
+ LIMIT="$2"
+ shift 2
+ ;;
+ --dry-run)
+ DRY_RUN=true
+ shift
+ ;;
+ -h|--help)
+ show_help
+ exit 0
+ ;;
+ *)
+ echo_error "Unknown option: $1"
+ echo "Use -h or --help for usage information."
+ exit 1
+ ;;
+ esac
+done
+
+# ==============================================================================
+# Validation
+# ==============================================================================
+
+# Domain is always required
+if [[ -z "$DOMAIN" ]]; then
+ echo_error "--domain is required"
+ echo "Use -h or --help for usage information."
+ exit 1
+fi
+
+# Input JSON required for generation
+if [[ "$DO_GENERATE" == true ]] && [[ -z "$INPUT_JSON" ]]; then
+ echo_error "--input is required for table generation"
+ echo "Use --regenerate-qa-only, --filter-only, or --eval-only to skip generation."
+ exit 1
+fi
+
+# Check input file exists
+if [[ -n "$INPUT_JSON" ]] && [[ ! -f "$INPUT_JSON" ]]; then
+ echo_error "Input file not found: $INPUT_JSON"
+ exit 1
+fi
+
+# Set default output directory
+if [[ -z "$OUTPUT_DIR" ]]; then
+ OUTPUT_DIR="output_${DOMAIN}"
+fi
+
+# ==============================================================================
+# Check Dependencies
+# ==============================================================================
+
+check_vllm_connection() {
+ if curl -s --connect-timeout 5 "${VLLM_URL}/models" > /dev/null 2>&1; then
+ VLLM_MODEL=$(curl -s "${VLLM_URL}/models" | python3 -c "import sys, json; data = json.load(sys.stdin); print(data['data'][0]['id'] if data.get('data') else 'unknown')" 2>/dev/null || echo "unknown")
+ echo_info "vLLM connected: ${VLLM_MODEL}"
+ return 0
+ else
+ return 1
+ fi
+}
+
+check_api_key() {
+ case $PROVIDER in
+ claude|anthropic)
+ if [[ -z "$ANTHROPIC_API_KEY" ]]; then
+ echo_warn "ANTHROPIC_API_KEY is not set"
+ fi
+ ;;
+ openai)
+ if [[ -z "$OPENAI_API_KEY" ]]; then
+ echo_warn "OPENAI_API_KEY is not set"
+ fi
+ ;;
+ gemini|google)
+ if [[ -z "$GOOGLE_API_KEY" ]]; then
+ echo_warn "GOOGLE_API_KEY is not set"
+ fi
+ ;;
+ esac
+}
+
+# ==============================================================================
+# Main Pipeline
+# ==============================================================================
+
+echo_header "TableMagnifier - Master Pipeline"
+
+echo ""
+echo "Configuration:"
+echo " Domain: $DOMAIN"
+echo " Output Dir: $OUTPUT_DIR"
+echo " Provider: $PROVIDER"
+echo " Model: $MODEL"
+if [[ -n "$INPUT_JSON" ]]; then
+ echo " Input JSON: $INPUT_JSON"
+fi
+if [[ -n "$LIMIT" ]]; then
+ echo " Limit: $LIMIT entries"
+fi
+if [[ "$DRY_RUN" == true ]]; then
+ echo " Mode: DRY RUN"
+fi
+echo ""
+echo "Pipeline Steps:"
+echo " 1. Generate Tables: $([ "$DO_GENERATE" == true ] && echo "✓" || echo "✗")"
+echo " 2. Capture Images: $([ "$DO_CAPTURE" == true ] && echo "✓" || echo "✗")"
+echo " 3. Regenerate QA: $([ "$DO_REGENERATE_QA" == true ] && echo "✓" || echo "✗")"
+echo " 4. Filter QA: $([ "$DO_FILTER" == true ] && echo "✓" || echo "✗")"
+echo " 5. Evaluate: $([ "$DO_EVAL" == true ] && echo "✓" || echo "✗")"
+echo ""
+
+# Check API key for generation steps
+if [[ "$DO_GENERATE" == true ]] || [[ "$DO_REGENERATE_QA" == true ]]; then
+ check_api_key
+fi
+
+# Check vLLM for filter/eval steps
+if [[ "$DO_FILTER" == true ]] || [[ "$DO_EVAL" == true ]]; then
+ echo_info "Checking vLLM connection..."
+ if ! check_vllm_connection; then
+ echo_error "Cannot connect to vLLM server at ${VLLM_URL}"
+ echo_error "Please ensure vLLM server is running for filter/eval steps."
+ exit 1
+ fi
+fi
+
+STEP_NUM=0
+
+# ------------------------------------------------------------------------------
+# Step 1: Generate Synthetic Tables
+# ------------------------------------------------------------------------------
+if [[ "$DO_GENERATE" == true ]]; then
+ STEP_NUM=$((STEP_NUM + 1))
+ echo_step $STEP_NUM "Generate Synthetic Tables"
+
+ GENERATE_ARGS="--input \"$INPUT_JSON\" --output-dir \"$OUTPUT_DIR\" --provider \"$PROVIDER\" --model \"$MODEL\" --domain \"$DOMAIN\""
+
+ if [[ "$SKIP_QA" == true ]]; then
+ GENERATE_ARGS="$GENERATE_ARGS --skip-qa"
+ fi
+
+ if [[ -n "$LIMIT" ]]; then
+ GENERATE_ARGS="$GENERATE_ARGS --limit $LIMIT"
+ fi
+
+ if [[ "$DRY_RUN" == true ]]; then
+ echo_info "[DRY RUN] Would run: uv run python run_pipeline_json.py $GENERATE_ARGS"
+ else
+ eval "uv run python run_pipeline_json.py $GENERATE_ARGS"
+ echo_success "Table generation completed"
+ fi
+fi
+
+# ------------------------------------------------------------------------------
+# Step 2: Capture HTML to Images
+# ------------------------------------------------------------------------------
+if [[ "$DO_CAPTURE" == true ]]; then
+ STEP_NUM=$((STEP_NUM + 1))
+ echo_step $STEP_NUM "Capture HTML to Images"
+
+ CAPTURE_ARGS="--output-dirs $OUTPUT_DIR"
+
+ if [[ "$DRY_RUN" == true ]]; then
+ echo_info "[DRY RUN] Would run: uv run python capture_html_images.py $CAPTURE_ARGS"
+ else
+ uv run python capture_html_images.py $CAPTURE_ARGS
+ echo_success "Image capture completed"
+ fi
+fi
+
+# ------------------------------------------------------------------------------
+# Step 3: Regenerate QA (Optional)
+# ------------------------------------------------------------------------------
+if [[ "$DO_REGENERATE_QA" == true ]]; then
+ STEP_NUM=$((STEP_NUM + 1))
+ echo_step $STEP_NUM "Regenerate QA"
+
+ REGEN_ARGS="--domain $DOMAIN --provider $PROVIDER --model $MODEL"
+
+ if [[ -n "$LIMIT" ]]; then
+ REGEN_ARGS="$REGEN_ARGS --limit $LIMIT"
+ fi
+
+ if [[ "$DRY_RUN" == true ]]; then
+ REGEN_ARGS="$REGEN_ARGS --dry-run"
+ fi
+
+ uv run python regenerate_qa.py $REGEN_ARGS
+ echo_success "QA regeneration completed"
+fi
+
+# ------------------------------------------------------------------------------
+# Step 4: Filter QA by Difficulty (Optional)
+# ------------------------------------------------------------------------------
+if [[ "$DO_FILTER" == true ]]; then
+ STEP_NUM=$((STEP_NUM + 1))
+ echo_step $STEP_NUM "Filter QA by Difficulty"
+
+ FILTER_ARGS="--domain $DOMAIN --vllm-url $VLLM_URL --trials $FILTER_TRIALS --min-acc $FILTER_MIN_ACC --max-acc $FILTER_MAX_ACC"
+
+ if [[ -n "$LIMIT" ]]; then
+ FILTER_ARGS="$FILTER_ARGS --limit $LIMIT"
+ fi
+
+ if [[ "$DRY_RUN" == true ]]; then
+ FILTER_ARGS="$FILTER_ARGS --dry-run"
+ fi
+
+ uv run python filter_qa_by_difficulty.py $FILTER_ARGS
+ echo_success "QA filtering completed"
+fi
+
+# ------------------------------------------------------------------------------
+# Step 5: Evaluate (Optional)
+# ------------------------------------------------------------------------------
+if [[ "$DO_EVAL" == true ]]; then
+ STEP_NUM=$((STEP_NUM + 1))
+ echo_step $STEP_NUM "Evaluate with vLLM"
+
+ EVAL_ARGS="--domain $DOMAIN --vllm-url $VLLM_URL"
+
+ if [[ -n "$LIMIT" ]]; then
+ EVAL_ARGS="$EVAL_ARGS --limit $LIMIT"
+ fi
+
+ if [[ "$DRY_RUN" == true ]]; then
+ EVAL_ARGS="$EVAL_ARGS --dry-run"
+ fi
+
+ uv run python -m eval.evaluate_vllm $EVAL_ARGS
+ echo_success "Evaluation completed"
+fi
+
+# ==============================================================================
+# Summary
+# ==============================================================================
+
+echo_header "Pipeline Completed"
+
+echo ""
+echo "Output Directory: $OUTPUT_DIR/"
+echo ""
+echo "Generated Files:"
+
+if [[ -d "$OUTPUT_DIR" ]]; then
+ # Count files
+ JSON_COUNT=$(find "$OUTPUT_DIR" -maxdepth 1 -name "*.json" 2>/dev/null | wc -l)
+ HTML_COUNT=$(find "$OUTPUT_DIR/html" -name "*.html" 2>/dev/null | wc -l)
+ IMAGE_COUNT=$(find "$OUTPUT_DIR/images" -name "*.png" 2>/dev/null | wc -l)
+
+ echo " - JSON files: $JSON_COUNT"
+ echo " - HTML files: $HTML_COUNT (in html/)"
+ echo " - Images: $IMAGE_COUNT (in images/)"
+
+ if [[ "$DO_FILTER" == true ]] && [[ "$DRY_RUN" != true ]]; then
+ REVIEW_FILE=$(ls -t "$OUTPUT_DIR"/qa_for_review_*.json 2>/dev/null | head -1)
+ if [[ -n "$REVIEW_FILE" ]]; then
+ REVIEW_COUNT=$(python3 -c "import json; print(json.load(open('$REVIEW_FILE'))['count'])" 2>/dev/null || echo "?")
+ echo ""
+ echo " Review File: $(basename $REVIEW_FILE)"
+ echo " QA for Review: $REVIEW_COUNT items"
+ fi
+ fi
+
+ if [[ "$DO_EVAL" == true ]] && [[ "$DRY_RUN" != true ]]; then
+ EVAL_FILE=$(ls -t "$OUTPUT_DIR"/eval_results_*.json 2>/dev/null | head -1)
+ if [[ -n "$EVAL_FILE" ]]; then
+ echo ""
+ echo " Eval Results: $(basename $EVAL_FILE)"
+ fi
+ fi
+fi
+
+echo ""
+echo -e "${GREEN}Done!${NC}"
diff --git a/run_capture_html.sh b/run_capture_html.sh
new file mode 100755
index 0000000..fb3a9df
--- /dev/null
+++ b/run_capture_html.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# ==============================================================================
+# HTML to Image Capture Script
+# Captures HTML files from output_*/html/ directories as PNG images
+# ==============================================================================
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+echo "==================================="
+echo " HTML to Image Capture"
+echo "==================================="
+
+# Check if playwright is installed
+if ! uv run python -c "import playwright" 2>/dev/null; then
+ echo "[INFO] Installing playwright..."
+ uv add playwright
+ uv run playwright install chromium
+fi
+
+# Run the capture script
+uv run python "$SCRIPT_DIR/capture_html_images.py" "$@"
diff --git a/run_evaluate_vllm.sh b/run_evaluate_vllm.sh
new file mode 100755
index 0000000..762a672
--- /dev/null
+++ b/run_evaluate_vllm.sh
@@ -0,0 +1,255 @@
+#!/bin/bash
+#
+# vLLM 서버를 사용한 Table QA 평가 스크립트
+#
+# 사전 요구사항:
+# 1. vLLM 서버가 실행 중이어야 합니다
+# 2. HTML 파일들이 이미지로 캡처되어 있어야 합니다 (./run_capture_html.sh 실행)
+#
+# Usage:
+# ./run_evaluate_vllm.sh [OPTIONS]
+#
+# Examples:
+# # 모든 도메인 평가
+# ./run_evaluate_vllm.sh --all-domains
+#
+# # 단일 도메인 평가
+# ./run_evaluate_vllm.sh --domain public
+#
+# # 커스텀 vLLM URL
+# ./run_evaluate_vllm.sh --domain public --vllm-url http://gpu-server:8000/v1
+#
+# # 특정 모델 사용
+# ./run_evaluate_vllm.sh --domain business --model Qwen/Qwen2.5-VL-7B-Instruct
+#
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+
+# 기본 설정
+VLLM_URL="${VLLM_URL:-http://localhost:8000/v1}"
+MODEL="${MODEL:-default}"
+OUTPUT_DIR="${OUTPUT_DIR:-eval_results}"
+
+# 색상 정의
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+echo_info() {
+ echo -e "${GREEN}[INFO]${NC} $1"
+}
+
+echo_warn() {
+ echo -e "${YELLOW}[WARN]${NC} $1"
+}
+
+echo_error() {
+ echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# 헬프 메시지
+show_help() {
+ cat << EOF
+Usage: $0 [OPTIONS]
+
+vLLM 서버를 사용한 Table QA 평가
+
+Options:
+ --domain DOMAIN 평가할 도메인 (academic, business, finance, medical, public)
+ --all-domains 모든 도메인 평가
+ --vllm-url URL vLLM 서버 URL (default: $VLLM_URL)
+ --model MODEL 사용할 모델 이름 (default: $MODEL)
+ --output-dir DIR 결과 저장 디렉토리 (default: $OUTPUT_DIR)
+ --use-judge LLM-as-Judge 평가 사용
+ --judge-model MODEL Judge 모델 (default: gpt-4o-mini)
+ --limit N 평가할 최대 샘플 수 (디버깅용)
+ --qa-types TYPES 특정 QA 타입만 평가 (예: "lookup compare")
+ --capture-html 평가 전 HTML을 이미지로 캡처
+ -h, --help 이 도움말 표시
+
+Environment Variables:
+ VLLM_URL vLLM 서버 URL
+ MODEL 사용할 모델 이름
+ OUTPUT_DIR 결과 저장 디렉토리
+ OPENAI_API_KEY LLM-as-Judge 사용 시 OpenAI API 키
+
+Examples:
+ # 모든 도메인 평가
+ $0 --all-domains
+
+ # public 도메인만 평가
+ $0 --domain public
+
+ # LLM-as-Judge 포함 평가
+ $0 --domain finance --use-judge --judge-model gpt-4o
+
+ # 10개 샘플로 빠른 테스트
+ $0 --domain public --limit 10
+EOF
+}
+
+# vLLM 서버 연결 확인
+check_vllm_connection() {
+ local url=$1
+ echo_info "vLLM 서버 연결 확인: $url"
+
+ # /v1/models 엔드포인트로 연결 테스트
+ if curl -s --connect-timeout 5 "$url/models" > /dev/null 2>&1; then
+ echo_info "vLLM 서버 연결 성공"
+ return 0
+ else
+ echo_error "vLLM 서버에 연결할 수 없습니다: $url"
+ echo_error "vLLM 서버가 실행 중인지 확인하세요."
+ return 1
+ fi
+}
+
+# 이미지 디렉토리 확인
+check_images() {
+ local domains=("academic" "business" "finance" "medical" "public")
+ local missing=0
+
+ for domain in "${domains[@]}"; do
+ local output_dir="output_${domain}"
+ local images_dir="${output_dir}/images"
+
+ if [[ -d "$output_dir" ]]; then
+ if [[ ! -d "$images_dir" ]] || [[ -z "$(ls -A "$images_dir" 2>/dev/null)" ]]; then
+ echo_warn "$domain: 이미지 디렉토리가 비어있거나 없습니다 ($images_dir)"
+ ((missing++))
+ fi
+ fi
+ done
+
+ if [[ $missing -gt 0 ]]; then
+ echo_warn "일부 도메인에 이미지가 없습니다. HTML 캡처가 필요할 수 있습니다."
+ echo_warn "실행: ./run_capture_html.sh"
+ fi
+}
+
+# 인자 파싱
+DOMAIN=""
+ALL_DOMAINS=false
+CAPTURE_HTML=false
+USE_JUDGE=false
+JUDGE_MODEL=""
+LIMIT=""
+QA_TYPES=""
+
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ --domain)
+ DOMAIN="$2"
+ shift 2
+ ;;
+ --all-domains)
+ ALL_DOMAINS=true
+ shift
+ ;;
+ --vllm-url)
+ VLLM_URL="$2"
+ shift 2
+ ;;
+ --model)
+ MODEL="$2"
+ shift 2
+ ;;
+ --output-dir)
+ OUTPUT_DIR="$2"
+ shift 2
+ ;;
+ --use-judge)
+ USE_JUDGE=true
+ shift
+ ;;
+ --judge-model)
+ JUDGE_MODEL="$2"
+ shift 2
+ ;;
+ --limit)
+ LIMIT="$2"
+ shift 2
+ ;;
+ --qa-types)
+ QA_TYPES="$2"
+ shift 2
+ ;;
+ --capture-html)
+ CAPTURE_HTML=true
+ shift
+ ;;
+ -h|--help)
+ show_help
+ exit 0
+ ;;
+ *)
+ echo_error "알 수 없는 옵션: $1"
+ show_help
+ exit 1
+ ;;
+ esac
+done
+
+# 인자 검증
+if [[ -z "$DOMAIN" ]] && [[ "$ALL_DOMAINS" != true ]]; then
+ echo_error "--domain 또는 --all-domains를 지정해야 합니다."
+ show_help
+ exit 1
+fi
+
+# HTML 캡처 (옵션)
+if [[ "$CAPTURE_HTML" == true ]]; then
+ echo_info "HTML 파일을 이미지로 캡처합니다..."
+ if [[ -f "./run_capture_html.sh" ]]; then
+ ./run_capture_html.sh
+ else
+ echo_warn "run_capture_html.sh를 찾을 수 없습니다. 스킵합니다."
+ fi
+fi
+
+# vLLM 연결 확인
+check_vllm_connection "$VLLM_URL" || exit 1
+
+# 이미지 확인
+check_images
+
+# 평가 명령어 구성
+CMD="uv run python -m eval.evaluate_vllm"
+CMD="$CMD --vllm-url $VLLM_URL"
+CMD="$CMD --model $MODEL"
+CMD="$CMD --output-dir $OUTPUT_DIR"
+
+if [[ "$ALL_DOMAINS" == true ]]; then
+ CMD="$CMD --all-domains"
+elif [[ -n "$DOMAIN" ]]; then
+ CMD="$CMD --domain $DOMAIN"
+fi
+
+if [[ "$USE_JUDGE" == true ]]; then
+ CMD="$CMD --use-judge"
+ if [[ -n "$JUDGE_MODEL" ]]; then
+ CMD="$CMD --judge-model $JUDGE_MODEL"
+ fi
+fi
+
+if [[ -n "$LIMIT" ]]; then
+ CMD="$CMD --limit $LIMIT"
+fi
+
+if [[ -n "$QA_TYPES" ]]; then
+ CMD="$CMD --qa-types $QA_TYPES"
+fi
+
+# 평가 실행
+echo_info "평가 시작..."
+echo_info "Command: $CMD"
+echo ""
+
+eval $CMD
+
+echo ""
+echo_info "평가 완료. 결과: $OUTPUT_DIR/"
diff --git a/run_filter_qa.sh b/run_filter_qa.sh
new file mode 100755
index 0000000..8ed106b
--- /dev/null
+++ b/run_filter_qa.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+
+# ==============================================================================
+# TableMagnifier - QA Difficulty Filtering
+# ==============================================================================
+#
+# vLLM 서버를 사용하여 QA 난이도를 측정하고 검수 대상을 필터링합니다.
+# 모델이 너무 쉽게 맞추는 문제(90%+)는 제외하고,
+# 적당한 난이도(30-60%)의 QA만 검수 리스트로 추출합니다.
+#
+# Usage:
+# ./run_filter_qa.sh [OPTIONS]
+#
+# Examples:
+# ./run_filter_qa.sh --domain business
+# ./run_filter_qa.sh --all --trials 5
+# ./run_filter_qa.sh --domain business --vllm-url http://gpu-server:8000/v1
+#
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+
+# Colors
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+RED='\033[0;31m'
+NC='\033[0m'
+
+echo_info() {
+ echo -e "${GREEN}[INFO]${NC} $1"
+}
+
+echo_warn() {
+ echo -e "${YELLOW}[WARN]${NC} $1"
+}
+
+echo_error() {
+ echo -e "${RED}[ERROR]${NC} $1"
+}
+
+show_help() {
+ cat << EOF
+Usage: $0 [OPTIONS]
+
+vLLM 서버를 사용하여 QA 난이도를 측정하고 검수 대상을 필터링합니다.
+
+Options:
+ --domain DOMAIN [...] 필터링할 도메인(들) (business, finance, academic, medical, public)
+ --all 모든 도메인 필터링
+ --vllm-url URL vLLM 서버 URL (default: http://localhost:8000/v1)
+ --model MODEL 모델 이름 (미지정시 자동 감지)
+ --trials N 각 QA당 시도 횟수 (default: 10)
+ --min-acc FLOAT 최소 정확도 (default: 0.3)
+ --max-acc FLOAT 최대 정확도 (default: 0.6)
+ --limit N 처리할 최대 entry 수 (테스트용)
+ --dry-run 실제 추론 없이 확인만
+ -h, --help 도움말 표시
+
+Difficulty Categories:
+ - too_easy: 90-100% (제외 - 모델이 다 맞춤)
+ - easy: 70-89%
+ - medium: 30-69% (검수 대상 ✓)
+ - hard: 1-29%
+ - very_hard: 0%
+
+Examples:
+ # business 도메인 필터링
+ $0 --domain business
+
+ # 빠른 테스트 (5회 시도, 2개 entry만)
+ $0 --domain business --trials 5 --limit 2
+
+ # 외부 vLLM 서버 사용
+ $0 --domain business --vllm-url http://gpu-server:8000/v1
+
+Output:
+ - qa_difficulty_analysis_*.json: 전체 분석 결과
+ - qa_for_review_*.json: 검수용 필터링된 QA 리스트
+EOF
+}
+
+# Check for help
+for arg in "$@"; do
+ if [[ "$arg" == "-h" ]] || [[ "$arg" == "--help" ]]; then
+ show_help
+ exit 0
+ fi
+done
+
+# Check for required arguments
+if [[ $# -eq 0 ]]; then
+ show_help
+ exit 1
+fi
+
+# Parse vllm-url for connection check
+VLLM_URL="http://localhost:8000/v1"
+for i in $(seq 1 $#); do
+ arg="${!i}"
+ if [[ "$arg" == "--vllm-url" ]]; then
+ next=$((i + 1))
+ VLLM_URL="${!next}"
+ break
+ fi
+done
+
+echo "=============================================="
+echo " TableMagnifier - QA Difficulty Filtering"
+echo "=============================================="
+echo ""
+
+# Check vLLM connection
+echo_info "Checking vLLM server connection..."
+if curl -s --connect-timeout 5 "${VLLM_URL}/models" > /dev/null 2>&1; then
+ MODEL_INFO=$(curl -s "${VLLM_URL}/models" | python3 -c "import sys, json; data = json.load(sys.stdin); print(data['data'][0]['id'] if data.get('data') else 'unknown')" 2>/dev/null || echo "unknown")
+ echo_info "vLLM server connected. Model: ${MODEL_INFO}"
+else
+ echo_error "Cannot connect to vLLM server at ${VLLM_URL}"
+ echo_error "Please ensure vLLM server is running."
+ exit 1
+fi
+
+echo ""
+echo_info "Starting QA difficulty filtering..."
+echo ""
+
+# Run the filter script
+uv run python filter_qa_by_difficulty.py "$@"
+
+echo ""
+echo_info "Filtering completed!"
+echo ""
+echo "Generated files:"
+echo " - qa_difficulty_analysis_*.json: Full analysis results"
+echo " - qa_for_review_*.json: Filtered QA for human review"
diff --git a/run_openai_public.sh b/run_openai_public.sh
deleted file mode 100644
index d90e1f6..0000000
--- a/run_openai_public.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-
-# ==============================================================================
-# TableMagnifier - JSON Pipeline (Public Domain)
-# ==============================================================================
-
-# Default Configuration
-INPUT_JSON="test_business.json"
-OUTPUT_DIR="output_business"
-DEFAULT_ARGS="--provider claude --model claude-sonnet-4-5 --domain business"
-
-# Check if the first argument is a JSON file path
-if [[ "$1" == *.json ]]; then
- INPUT_JSON="$1"
- shift
-fi
-
-echo "=============================================="
-echo " TableMagnifier - JSON Pipeline (Public)"
-echo "=============================================="
-echo "Input JSON: $INPUT_JSON"
-echo "Output Dir: $OUTPUT_DIR"
-echo "Provider: claude"
-echo "Model: claude-sonnet-4-5"
-echo "Domain: business"
-echo ""
-echo "💡 Tip: To upload to Notion during pipeline execution:"
-echo " Add --upload-to-notion flag to the command"
-echo ""
-echo "💡 To upload existing results later:"
-echo " python upload_to_notion_from_json.py $OUTPUT_DIR"
-echo ""
-
-# Check for ANTHROPIC_API_KEY
-if [[ -z "$ANTHROPIC_API_KEY" ]]; then
- echo "⚠️ Warning: ANTHROPIC_API_KEY is not set."
- echo " Please set it in your environment or .env file."
- echo ""
-fi
-
-# Run the pipeline
-# Note: "$@" appends any remaining arguments, allowing overrides of defaults
-uv run python run_pipeline_json.py --input "$INPUT_JSON" --output-dir "$OUTPUT_DIR" $DEFAULT_ARGS "$@"
\ No newline at end of file
diff --git a/run_pipeline.sh b/run_pipeline.sh
new file mode 100644
index 0000000..460929c
--- /dev/null
+++ b/run_pipeline.sh
@@ -0,0 +1,171 @@
+#!/bin/bash
+
+# ==============================================================================
+# TableMagnifier - JSON Pipeline
+# ==============================================================================
+#
+# Usage:
+# ./run_pipeline.sh [INPUT_JSON] [OPTIONS]
+#
+# Examples:
+# ./run_pipeline.sh test_public.json --domain public
+# ./run_pipeline.sh test_business.json --domain business --provider openai
+# ./run_pipeline.sh --input data.json --output-dir output_custom
+#
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+
+# Default Configuration
+INPUT_JSON=""
+OUTPUT_DIR=""
+PROVIDER="claude"
+MODEL="claude-sonnet-4-5"
+DOMAIN="public"
+
+# Colors
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+show_help() {
+ cat << EOF
+Usage: $0 [INPUT_JSON] [OPTIONS]
+
+TableMagnifier JSON Pipeline Runner
+
+Arguments:
+ INPUT_JSON Input JSON file path (optional, can use --input instead)
+
+Options:
+ --input FILE Input JSON file
+ --output-dir DIR Output directory (default: output_{domain})
+ --provider PROVIDER LLM provider: claude, openai, gemini (default: claude)
+ --model MODEL Model name (default: claude-sonnet-4-5)
+ --domain DOMAIN Domain: public, business, finance, medical, academic (default: public)
+ --qa-only Generate QA only (skip table generation)
+ --skip-qa Skip QA generation (table only)
+ --upload-to-notion Upload results to Notion
+ -h, --help Show this help
+
+Examples:
+ # Public domain with Claude
+ $0 test_public.json --domain public
+
+ # Business domain with OpenAI
+ $0 test_business.json --domain business --provider openai --model gpt-4o
+
+ # Finance domain, QA only mode
+ $0 test_finance.json --domain finance --qa-only
+
+ # Custom output directory
+ $0 data.json --output-dir my_output --domain medical
+EOF
+}
+
+# Parse first argument as JSON file if it ends with .json
+if [[ "$1" == *.json ]]; then
+ INPUT_JSON="$1"
+ shift
+fi
+
+# Parse remaining arguments
+EXTRA_ARGS=""
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ --input)
+ INPUT_JSON="$2"
+ shift 2
+ ;;
+ --output-dir)
+ OUTPUT_DIR="$2"
+ shift 2
+ ;;
+ --provider)
+ PROVIDER="$2"
+ shift 2
+ ;;
+ --model)
+ MODEL="$2"
+ shift 2
+ ;;
+ --domain)
+ DOMAIN="$2"
+ shift 2
+ ;;
+ -h|--help)
+ show_help
+ exit 0
+ ;;
+ *)
+ EXTRA_ARGS="$EXTRA_ARGS $1"
+ shift
+ ;;
+ esac
+done
+
+# Set default output directory based on domain
+if [[ -z "$OUTPUT_DIR" ]]; then
+ OUTPUT_DIR="output_${DOMAIN}"
+fi
+
+# Validate input
+if [[ -z "$INPUT_JSON" ]]; then
+ echo -e "${YELLOW}[WARN]${NC} No input JSON specified."
+ show_help
+ exit 1
+fi
+
+if [[ ! -f "$INPUT_JSON" ]]; then
+ echo -e "${YELLOW}[ERROR]${NC} Input file not found: $INPUT_JSON"
+ exit 1
+fi
+
+echo "=============================================="
+echo " TableMagnifier - JSON Pipeline"
+echo "=============================================="
+echo "Input JSON: $INPUT_JSON"
+echo "Output Dir: $OUTPUT_DIR"
+echo "Provider: $PROVIDER"
+echo "Model: $MODEL"
+echo "Domain: $DOMAIN"
+echo ""
+
+# Check API keys based on provider
+case $PROVIDER in
+ claude|anthropic)
+ if [[ -z "$ANTHROPIC_API_KEY" ]]; then
+ echo -e "${YELLOW}[WARN]${NC} ANTHROPIC_API_KEY is not set."
+ fi
+ ;;
+ openai)
+ if [[ -z "$OPENAI_API_KEY" ]]; then
+ echo -e "${YELLOW}[WARN]${NC} OPENAI_API_KEY is not set."
+ fi
+ ;;
+ gemini|google)
+ if [[ -z "$GOOGLE_API_KEY" ]]; then
+ echo -e "${YELLOW}[WARN]${NC} GOOGLE_API_KEY is not set."
+ fi
+ ;;
+esac
+
+echo -e "${GREEN}[INFO]${NC} Starting pipeline..."
+echo ""
+
+# Run the pipeline
+uv run python run_pipeline_json.py \
+ --input "$INPUT_JSON" \
+ --output-dir "$OUTPUT_DIR" \
+ --provider "$PROVIDER" \
+ --model "$MODEL" \
+ --domain "$DOMAIN" \
+ $EXTRA_ARGS
+
+echo ""
+echo -e "${GREEN}[INFO]${NC} Pipeline completed. Results saved to: $OUTPUT_DIR/"
+echo ""
+echo "To upload results to Notion:"
+echo " python upload_to_notion_from_json.py $OUTPUT_DIR"
diff --git a/run_pipeline_json.py b/run_pipeline_json.py
index d8fef9c..b282e46 100644
--- a/run_pipeline_json.py
+++ b/run_pipeline_json.py
@@ -576,6 +576,7 @@ def main():
parser.add_argument("--max-workers", type=int, default=3, help="Maximum number of parallel workers (default: 3)")
parser.add_argument("--randomize-style", action="store_true", default=True, help="Randomize HTML table styles (fonts, colors) for diversity (default: True)")
parser.add_argument("--no-randomize-style", dest="randomize_style", action="store_false", help="Disable style randomization")
+ parser.add_argument("--limit", type=int, help="Limit number of entries to process (for testing)")
args = parser.parse_args()
@@ -596,6 +597,11 @@ def main():
print("Error: Input JSON must be a list of pairs.")
return
+ # Apply limit if specified
+ if args.limit:
+ input_data = input_data[:args.limit]
+ print(f"Limited to {len(input_data)} entries")
+
data_root = Path(args.data_root)
output_dir = Path(args.output_dir)
diff --git a/run_regenerate_qa.sh b/run_regenerate_qa.sh
new file mode 100755
index 0000000..58aa98d
--- /dev/null
+++ b/run_regenerate_qa.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+
+# ==============================================================================
+# TableMagnifier - QA Regeneration Script
+# ==============================================================================
+#
+# 기존 synthetic 테이블에서 QA를 재생성합니다.
+# output_public은 제외됩니다.
+#
+# Usage:
+# ./run_regenerate_qa.sh [OPTIONS]
+#
+# Examples:
+# ./run_regenerate_qa.sh --all # 모든 도메인
+# ./run_regenerate_qa.sh --domain business # 특정 도메인
+# ./run_regenerate_qa.sh --domain business finance # 여러 도메인
+#
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+
+# Colors
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+RED='\033[0;31m'
+NC='\033[0m'
+
+echo_info() {
+ echo -e "${GREEN}[INFO]${NC} $1"
+}
+
+echo_warn() {
+ echo -e "${YELLOW}[WARN]${NC} $1"
+}
+
+echo_error() {
+ echo -e "${RED}[ERROR]${NC} $1"
+}
+
+show_help() {
+ cat << EOF
+Usage: $0 [OPTIONS]
+
+기존 synthetic 테이블에서 QA를 재생성합니다.
+
+Options:
+ --domain DOMAIN [DOMAIN ...] 재생성할 도메인(들) (business, finance, academic, medical)
+ --all 모든 도메인 재생성 (output_public 제외)
+ --provider PROVIDER LLM 제공자 (claude, openai, gemini) (default: claude)
+ --model MODEL 모델 이름 (default: claude-sonnet-4-5)
+ --no-long-sequence long_sequence QA 생성 스킵
+ --limit N 처리할 최대 entry 수 (테스트용)
+ --dry-run 실제 재생성 없이 확인만
+ -h, --help 도움말 표시
+
+Examples:
+ # 모든 도메인 재생성
+ $0 --all
+
+ # business 도메인만
+ $0 --domain business
+
+ # OpenAI 사용
+ $0 --domain business --provider openai --model gpt-4o
+
+ # 테스트 (5개만)
+ $0 --domain business --limit 5
+
+ # Dry run
+ $0 --all --dry-run
+EOF
+}
+
+# Check for required arguments
+if [[ $# -eq 0 ]]; then
+ show_help
+ exit 1
+fi
+
+# Check for help flag
+for arg in "$@"; do
+ if [[ "$arg" == "-h" ]] || [[ "$arg" == "--help" ]]; then
+ show_help
+ exit 0
+ fi
+done
+
+# Check API keys
+check_api_keys() {
+ local provider="$1"
+ case $provider in
+ claude|anthropic)
+ if [[ -z "$ANTHROPIC_API_KEY" ]]; then
+ echo_warn "ANTHROPIC_API_KEY is not set"
+ fi
+ ;;
+ openai)
+ if [[ -z "$OPENAI_API_KEY" ]]; then
+ echo_warn "OPENAI_API_KEY is not set"
+ fi
+ ;;
+ gemini|google)
+ if [[ -z "$GOOGLE_API_KEY" ]]; then
+ echo_warn "GOOGLE_API_KEY is not set"
+ fi
+ ;;
+ esac
+}
+
+# Parse provider from arguments
+PROVIDER="claude"
+for i in "${!@}"; do
+ if [[ "${!i}" == "--provider" ]]; then
+ next=$((i + 1))
+ PROVIDER="${!next}"
+ break
+ fi
+done
+
+check_api_keys "$PROVIDER"
+
+echo "=============================================="
+echo " TableMagnifier - QA Regeneration"
+echo "=============================================="
+echo ""
+
+# Run the regeneration script
+uv run python regenerate_qa.py "$@"
diff --git a/single_image_json_list/single_table_academic_input.json b/single_image_json_list/single_table_academic_input.json
new file mode 100644
index 0000000..483318f
--- /dev/null
+++ b/single_image_json_list/single_table_academic_input.json
@@ -0,0 +1,1602 @@
+[
+ {
+ "index": 0,
+ "pair_id": "A_origin_0_A_table_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_0/A_table_0.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_0",
+ "table_id": "A_table_0"
+ },
+ {
+ "index": 1,
+ "pair_id": "A_origin_0_A_table_1",
+ "image_paths": [
+ "data/Academic/Table/A_origin_0/A_table_1.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_0",
+ "table_id": "A_table_1"
+ },
+ {
+ "index": 2,
+ "pair_id": "A_origin_0_A_table_2",
+ "image_paths": [
+ "data/Academic/Table/A_origin_0/A_table_2.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_0",
+ "table_id": "A_table_2"
+ },
+ {
+ "index": 3,
+ "pair_id": "A_origin_0_A_table_3",
+ "image_paths": [
+ "data/Academic/Table/A_origin_0/A_table_3.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_0",
+ "table_id": "A_table_3"
+ },
+ {
+ "index": 4,
+ "pair_id": "A_origin_0_A_table_4",
+ "image_paths": [
+ "data/Academic/Table/A_origin_0/A_table_4.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_0",
+ "table_id": "A_table_4"
+ },
+ {
+ "index": 5,
+ "pair_id": "A_origin_1_A_table_5_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_1/A_table_5_0.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_1",
+ "table_id": "A_table_5_0"
+ },
+ {
+ "index": 6,
+ "pair_id": "A_origin_1_A_table_5_1",
+ "image_paths": [
+ "data/Academic/Table/A_origin_1/A_table_5_1.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_1",
+ "table_id": "A_table_5_1"
+ },
+ {
+ "index": 7,
+ "pair_id": "A_origin_1_A_table_6_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_1/A_table_6_0.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_1",
+ "table_id": "A_table_6_0"
+ },
+ {
+ "index": 8,
+ "pair_id": "A_origin_1_A_table_6_1",
+ "image_paths": [
+ "data/Academic/Table/A_origin_1/A_table_6_1.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_1",
+ "table_id": "A_table_6_1"
+ },
+ {
+ "index": 9,
+ "pair_id": "A_origin_1_A_table_7_0",
+ "image_paths": [
+ "data/Academic/Table/A_origin_1/A_table_7_0.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_1",
+ "table_id": "A_table_7_0"
+ },
+ {
+ "index": 10,
+ "pair_id": "A_origin_1_A_table_7_1",
+ "image_paths": [
+ "data/Academic/Table/A_origin_1/A_table_7_1.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_1",
+ "table_id": "A_table_7_1"
+ },
+ {
+ "index": 11,
+ "pair_id": "A_origin_10_A_table_28",
+ "image_paths": [
+ "data/Academic/Table/A_origin_10/A_table_28.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_10",
+ "table_id": "A_table_28"
+ },
+ {
+ "index": 12,
+ "pair_id": "A_origin_10_A_table_29",
+ "image_paths": [
+ "data/Academic/Table/A_origin_10/A_table_29.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_10",
+ "table_id": "A_table_29"
+ },
+ {
+ "index": 13,
+ "pair_id": "A_origin_11_A_table_30",
+ "image_paths": [
+ "data/Academic/Table/A_origin_11/A_table_30.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_11",
+ "table_id": "A_table_30"
+ },
+ {
+ "index": 14,
+ "pair_id": "A_origin_11_A_table_31",
+ "image_paths": [
+ "data/Academic/Table/A_origin_11/A_table_31.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_11",
+ "table_id": "A_table_31"
+ },
+ {
+ "index": 15,
+ "pair_id": "A_origin_12_A_table_32",
+ "image_paths": [
+ "data/Academic/Table/A_origin_12/A_table_32.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_12",
+ "table_id": "A_table_32"
+ },
+ {
+ "index": 16,
+ "pair_id": "A_origin_12_A_table_33",
+ "image_paths": [
+ "data/Academic/Table/A_origin_12/A_table_33.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_12",
+ "table_id": "A_table_33"
+ },
+ {
+ "index": 17,
+ "pair_id": "A_origin_12_A_table_34",
+ "image_paths": [
+ "data/Academic/Table/A_origin_12/A_table_34.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_12",
+ "table_id": "A_table_34"
+ },
+ {
+ "index": 18,
+ "pair_id": "A_origin_13_A_table_35",
+ "image_paths": [
+ "data/Academic/Table/A_origin_13/A_table_35.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_13",
+ "table_id": "A_table_35"
+ },
+ {
+ "index": 19,
+ "pair_id": "A_origin_13_A_table_36",
+ "image_paths": [
+ "data/Academic/Table/A_origin_13/A_table_36.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_13",
+ "table_id": "A_table_36"
+ },
+ {
+ "index": 20,
+ "pair_id": "A_origin_13_A_table_37",
+ "image_paths": [
+ "data/Academic/Table/A_origin_13/A_table_37.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_13",
+ "table_id": "A_table_37"
+ },
+ {
+ "index": 21,
+ "pair_id": "A_origin_13_A_table_38",
+ "image_paths": [
+ "data/Academic/Table/A_origin_13/A_table_38.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_13",
+ "table_id": "A_table_38"
+ },
+ {
+ "index": 22,
+ "pair_id": "A_origin_14_A_table_39",
+ "image_paths": [
+ "data/Academic/Table/A_origin_14/A_table_39.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_14",
+ "table_id": "A_table_39"
+ },
+ {
+ "index": 23,
+ "pair_id": "A_origin_14_A_table_40",
+ "image_paths": [
+ "data/Academic/Table/A_origin_14/A_table_40.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_14",
+ "table_id": "A_table_40"
+ },
+ {
+ "index": 24,
+ "pair_id": "A_origin_15_A_table_41",
+ "image_paths": [
+ "data/Academic/Table/A_origin_15/A_table_41.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_15",
+ "table_id": "A_table_41"
+ },
+ {
+ "index": 25,
+ "pair_id": "A_origin_16_A_table_42",
+ "image_paths": [
+ "data/Academic/Table/A_origin_16/A_table_42.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_16",
+ "table_id": "A_table_42"
+ },
+ {
+ "index": 26,
+ "pair_id": "A_origin_16_A_table_43",
+ "image_paths": [
+ "data/Academic/Table/A_origin_16/A_table_43.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_16",
+ "table_id": "A_table_43"
+ },
+ {
+ "index": 27,
+ "pair_id": "A_origin_17_A_table_44",
+ "image_paths": [
+ "data/Academic/Table/A_origin_17/A_table_44.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_17",
+ "table_id": "A_table_44"
+ },
+ {
+ "index": 28,
+ "pair_id": "A_origin_17_A_table_45",
+ "image_paths": [
+ "data/Academic/Table/A_origin_17/A_table_45.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_17",
+ "table_id": "A_table_45"
+ },
+ {
+ "index": 29,
+ "pair_id": "A_origin_17_A_table_46",
+ "image_paths": [
+ "data/Academic/Table/A_origin_17/A_table_46.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_17",
+ "table_id": "A_table_46"
+ },
+ {
+ "index": 30,
+ "pair_id": "A_origin_18_A_table_47",
+ "image_paths": [
+ "data/Academic/Table/A_origin_18/A_table_47.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_18",
+ "table_id": "A_table_47"
+ },
+ {
+ "index": 31,
+ "pair_id": "A_origin_18_A_table_48",
+ "image_paths": [
+ "data/Academic/Table/A_origin_18/A_table_48.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_18",
+ "table_id": "A_table_48"
+ },
+ {
+ "index": 32,
+ "pair_id": "A_origin_18_A_table_49",
+ "image_paths": [
+ "data/Academic/Table/A_origin_18/A_table_49.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_18",
+ "table_id": "A_table_49"
+ },
+ {
+ "index": 33,
+ "pair_id": "A_origin_19_A_table_50",
+ "image_paths": [
+ "data/Academic/Table/A_origin_19/A_table_50.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_19",
+ "table_id": "A_table_50"
+ },
+ {
+ "index": 34,
+ "pair_id": "A_origin_2_A_table_8",
+ "image_paths": [
+ "data/Academic/Table/A_origin_2/A_table_8.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_2",
+ "table_id": "A_table_8"
+ },
+ {
+ "index": 35,
+ "pair_id": "A_origin_2_A_table_9",
+ "image_paths": [
+ "data/Academic/Table/A_origin_2/A_table_9.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_2",
+ "table_id": "A_table_9"
+ },
+ {
+ "index": 36,
+ "pair_id": "A_origin_20_A_table_51",
+ "image_paths": [
+ "data/Academic/Table/A_origin_20/A_table_51.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_20",
+ "table_id": "A_table_51"
+ },
+ {
+ "index": 37,
+ "pair_id": "A_origin_20_A_table_52",
+ "image_paths": [
+ "data/Academic/Table/A_origin_20/A_table_52.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_20",
+ "table_id": "A_table_52"
+ },
+ {
+ "index": 38,
+ "pair_id": "A_origin_21_A_table_53",
+ "image_paths": [
+ "data/Academic/Table/A_origin_21/A_table_53.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_21",
+ "table_id": "A_table_53"
+ },
+ {
+ "index": 39,
+ "pair_id": "A_origin_21_A_table_54",
+ "image_paths": [
+ "data/Academic/Table/A_origin_21/A_table_54.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_21",
+ "table_id": "A_table_54"
+ },
+ {
+ "index": 40,
+ "pair_id": "A_origin_22_A_table_55",
+ "image_paths": [
+ "data/Academic/Table/A_origin_22/A_table_55.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_22",
+ "table_id": "A_table_55"
+ },
+ {
+ "index": 41,
+ "pair_id": "A_origin_23_A_table_56",
+ "image_paths": [
+ "data/Academic/Table/A_origin_23/A_table_56.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_23",
+ "table_id": "A_table_56"
+ },
+ {
+ "index": 42,
+ "pair_id": "A_origin_24_A_table_57",
+ "image_paths": [
+ "data/Academic/Table/A_origin_24/A_table_57.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_24",
+ "table_id": "A_table_57"
+ },
+ {
+ "index": 43,
+ "pair_id": "A_origin_25_A_table_58",
+ "image_paths": [
+ "data/Academic/Table/A_origin_25/A_table_58.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_25",
+ "table_id": "A_table_58"
+ },
+ {
+ "index": 44,
+ "pair_id": "A_origin_26_A_table_59_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_26/A_table_59_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_26",
+ "table_id": "A_table_59_01"
+ },
+ {
+ "index": 45,
+ "pair_id": "A_origin_26_A_table_59_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_26/A_table_59_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_26",
+ "table_id": "A_table_59_02"
+ },
+ {
+ "index": 46,
+ "pair_id": "A_origin_26_A_table_60",
+ "image_paths": [
+ "data/Academic/Table/A_origin_26/A_table_60.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_26",
+ "table_id": "A_table_60"
+ },
+ {
+ "index": 47,
+ "pair_id": "A_origin_26_A_table_61",
+ "image_paths": [
+ "data/Academic/Table/A_origin_26/A_table_61.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_26",
+ "table_id": "A_table_61"
+ },
+ {
+ "index": 48,
+ "pair_id": "A_origin_27_A_table_62_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_27/A_table_62_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_27",
+ "table_id": "A_table_62_01"
+ },
+ {
+ "index": 49,
+ "pair_id": "A_origin_27_A_table_62_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_27/A_table_62_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_27",
+ "table_id": "A_table_62_02"
+ },
+ {
+ "index": 50,
+ "pair_id": "A_origin_28_A_table_63_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_28/A_table_63_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_28",
+ "table_id": "A_table_63_01"
+ },
+ {
+ "index": 51,
+ "pair_id": "A_origin_28_A_table_63_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_28/A_table_63_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_28",
+ "table_id": "A_table_63_02"
+ },
+ {
+ "index": 52,
+ "pair_id": "A_origin_28_A_table_64",
+ "image_paths": [
+ "data/Academic/Table/A_origin_28/A_table_64.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_28",
+ "table_id": "A_table_64"
+ },
+ {
+ "index": 53,
+ "pair_id": "A_origin_29_A_table_65",
+ "image_paths": [
+ "data/Academic/Table/A_origin_29/A_table_65.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_29",
+ "table_id": "A_table_65"
+ },
+ {
+ "index": 54,
+ "pair_id": "A_origin_3_A_table_10",
+ "image_paths": [
+ "data/Academic/Table/A_origin_3/A_table_10.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_3",
+ "table_id": "A_table_10"
+ },
+ {
+ "index": 55,
+ "pair_id": "A_origin_3_A_table_11",
+ "image_paths": [
+ "data/Academic/Table/A_origin_3/A_table_11.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_3",
+ "table_id": "A_table_11"
+ },
+ {
+ "index": 56,
+ "pair_id": "A_origin_30_A_table_66_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_30/A_table_66_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_30",
+ "table_id": "A_table_66_01"
+ },
+ {
+ "index": 57,
+ "pair_id": "A_origin_30_A_table_66_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_30/A_table_66_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_30",
+ "table_id": "A_table_66_02"
+ },
+ {
+ "index": 58,
+ "pair_id": "A_origin_31_A_table_67_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_31/A_table_67_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_31",
+ "table_id": "A_table_67_01"
+ },
+ {
+ "index": 59,
+ "pair_id": "A_origin_31_A_table_67_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_31/A_table_67_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_31",
+ "table_id": "A_table_67_02"
+ },
+ {
+ "index": 60,
+ "pair_id": "A_origin_32_A_table_68_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_32/A_table_68_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_32",
+ "table_id": "A_table_68_01"
+ },
+ {
+ "index": 61,
+ "pair_id": "A_origin_32_A_table_68_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_32/A_table_68_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_32",
+ "table_id": "A_table_68_02"
+ },
+ {
+ "index": 62,
+ "pair_id": "A_origin_32_A_table_69_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_32/A_table_69_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_32",
+ "table_id": "A_table_69_01"
+ },
+ {
+ "index": 63,
+ "pair_id": "A_origin_32_A_table_69_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_32/A_table_69_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_32",
+ "table_id": "A_table_69_02"
+ },
+ {
+ "index": 64,
+ "pair_id": "A_origin_33_A_table_70_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_33/A_table_70_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_33",
+ "table_id": "A_table_70_01"
+ },
+ {
+ "index": 65,
+ "pair_id": "A_origin_33_A_table_70_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_33/A_table_70_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_33",
+ "table_id": "A_table_70_02"
+ },
+ {
+ "index": 66,
+ "pair_id": "A_origin_34_A_table_71_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_34/A_table_71_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_34",
+ "table_id": "A_table_71_01"
+ },
+ {
+ "index": 67,
+ "pair_id": "A_origin_34_A_table_71_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_34/A_table_71_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_34",
+ "table_id": "A_table_71_02"
+ },
+ {
+ "index": 68,
+ "pair_id": "A_origin_35_A_table_72",
+ "image_paths": [
+ "data/Academic/Table/A_origin_35/A_table_72.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_35",
+ "table_id": "A_table_72"
+ },
+ {
+ "index": 69,
+ "pair_id": "A_origin_36_A_table_73",
+ "image_paths": [
+ "data/Academic/Table/A_origin_36/A_table_73.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_36",
+ "table_id": "A_table_73"
+ },
+ {
+ "index": 70,
+ "pair_id": "A_origin_36_A_table_74",
+ "image_paths": [
+ "data/Academic/Table/A_origin_36/A_table_74.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_36",
+ "table_id": "A_table_74"
+ },
+ {
+ "index": 71,
+ "pair_id": "A_origin_36_A_table_75",
+ "image_paths": [
+ "data/Academic/Table/A_origin_36/A_table_75.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_36",
+ "table_id": "A_table_75"
+ },
+ {
+ "index": 72,
+ "pair_id": "A_origin_37_A_table_76_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_37/A_table_76_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_37",
+ "table_id": "A_table_76_01"
+ },
+ {
+ "index": 73,
+ "pair_id": "A_origin_37_A_table_76_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_37/A_table_76_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_37",
+ "table_id": "A_table_76_02"
+ },
+ {
+ "index": 74,
+ "pair_id": "A_origin_38_A_table_77",
+ "image_paths": [
+ "data/Academic/Table/A_origin_38/A_table_77.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_38",
+ "table_id": "A_table_77"
+ },
+ {
+ "index": 75,
+ "pair_id": "A_origin_38_A_table_78",
+ "image_paths": [
+ "data/Academic/Table/A_origin_38/A_table_78.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_38",
+ "table_id": "A_table_78"
+ },
+ {
+ "index": 76,
+ "pair_id": "A_origin_39_A_table_79",
+ "image_paths": [
+ "data/Academic/Table/A_origin_39/A_table_79.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_39",
+ "table_id": "A_table_79"
+ },
+ {
+ "index": 77,
+ "pair_id": "A_origin_39_A_table_80_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_39/A_table_80_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_39",
+ "table_id": "A_table_80_01"
+ },
+ {
+ "index": 78,
+ "pair_id": "A_origin_39_A_table_80_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_39/A_table_80_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_39",
+ "table_id": "A_table_80_02"
+ },
+ {
+ "index": 79,
+ "pair_id": "A_origin_4_A_table_12",
+ "image_paths": [
+ "data/Academic/Table/A_origin_4/A_table_12.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_4",
+ "table_id": "A_table_12"
+ },
+ {
+ "index": 80,
+ "pair_id": "A_origin_4_A_table_13",
+ "image_paths": [
+ "data/Academic/Table/A_origin_4/A_table_13.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_4",
+ "table_id": "A_table_13"
+ },
+ {
+ "index": 81,
+ "pair_id": "A_origin_40_A_table_81",
+ "image_paths": [
+ "data/Academic/Table/A_origin_40/A_table_81.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_40",
+ "table_id": "A_table_81"
+ },
+ {
+ "index": 82,
+ "pair_id": "A_origin_40_A_table_82",
+ "image_paths": [
+ "data/Academic/Table/A_origin_40/A_table_82.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_40",
+ "table_id": "A_table_82"
+ },
+ {
+ "index": 83,
+ "pair_id": "A_origin_40_A_table_83",
+ "image_paths": [
+ "data/Academic/Table/A_origin_40/A_table_83.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_40",
+ "table_id": "A_table_83"
+ },
+ {
+ "index": 84,
+ "pair_id": "A_origin_41_A_table_84",
+ "image_paths": [
+ "data/Academic/Table/A_origin_41/A_table_84.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_41",
+ "table_id": "A_table_84"
+ },
+ {
+ "index": 85,
+ "pair_id": "A_origin_41_A_table_85",
+ "image_paths": [
+ "data/Academic/Table/A_origin_41/A_table_85.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_41",
+ "table_id": "A_table_85"
+ },
+ {
+ "index": 86,
+ "pair_id": "A_origin_42_A_table_86",
+ "image_paths": [
+ "data/Academic/Table/A_origin_42/A_table_86.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_42",
+ "table_id": "A_table_86"
+ },
+ {
+ "index": 87,
+ "pair_id": "A_origin_42_A_table_87",
+ "image_paths": [
+ "data/Academic/Table/A_origin_42/A_table_87.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_42",
+ "table_id": "A_table_87"
+ },
+ {
+ "index": 88,
+ "pair_id": "A_origin_43_A_table_88",
+ "image_paths": [
+ "data/Academic/Table/A_origin_43/A_table_88.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_43",
+ "table_id": "A_table_88"
+ },
+ {
+ "index": 89,
+ "pair_id": "A_origin_43_A_table_89",
+ "image_paths": [
+ "data/Academic/Table/A_origin_43/A_table_89.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_43",
+ "table_id": "A_table_89"
+ },
+ {
+ "index": 90,
+ "pair_id": "A_origin_43_A_table_90",
+ "image_paths": [
+ "data/Academic/Table/A_origin_43/A_table_90.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_43",
+ "table_id": "A_table_90"
+ },
+ {
+ "index": 91,
+ "pair_id": "A_origin_44_A_table_91",
+ "image_paths": [
+ "data/Academic/Table/A_origin_44/A_table_91.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_44",
+ "table_id": "A_table_91"
+ },
+ {
+ "index": 92,
+ "pair_id": "A_origin_45_A_table_92",
+ "image_paths": [
+ "data/Academic/Table/A_origin_45/A_table_92.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_45",
+ "table_id": "A_table_92"
+ },
+ {
+ "index": 93,
+ "pair_id": "A_origin_45_A_table_93",
+ "image_paths": [
+ "data/Academic/Table/A_origin_45/A_table_93.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_45",
+ "table_id": "A_table_93"
+ },
+ {
+ "index": 94,
+ "pair_id": "A_origin_45_A_table_94",
+ "image_paths": [
+ "data/Academic/Table/A_origin_45/A_table_94.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_45",
+ "table_id": "A_table_94"
+ },
+ {
+ "index": 95,
+ "pair_id": "A_origin_45_A_table_95",
+ "image_paths": [
+ "data/Academic/Table/A_origin_45/A_table_95.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_45",
+ "table_id": "A_table_95"
+ },
+ {
+ "index": 96,
+ "pair_id": "A_origin_46_A_table_96",
+ "image_paths": [
+ "data/Academic/Table/A_origin_46/A_table_96.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_46",
+ "table_id": "A_table_96"
+ },
+ {
+ "index": 97,
+ "pair_id": "A_origin_47_A_table_97",
+ "image_paths": [
+ "data/Academic/Table/A_origin_47/A_table_97.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_47",
+ "table_id": "A_table_97"
+ },
+ {
+ "index": 98,
+ "pair_id": "A_origin_47_A_table_98",
+ "image_paths": [
+ "data/Academic/Table/A_origin_47/A_table_98.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_47",
+ "table_id": "A_table_98"
+ },
+ {
+ "index": 99,
+ "pair_id": "A_origin_48_A_table_99",
+ "image_paths": [
+ "data/Academic/Table/A_origin_48/A_table_99.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_48",
+ "table_id": "A_table_99"
+ },
+ {
+ "index": 100,
+ "pair_id": "A_origin_49_A_table_100",
+ "image_paths": [
+ "data/Academic/Table/A_origin_49/A_table_100.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_49",
+ "table_id": "A_table_100"
+ },
+ {
+ "index": 101,
+ "pair_id": "A_origin_49_A_table_101",
+ "image_paths": [
+ "data/Academic/Table/A_origin_49/A_table_101.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_49",
+ "table_id": "A_table_101"
+ },
+ {
+ "index": 102,
+ "pair_id": "A_origin_5_A_table_14",
+ "image_paths": [
+ "data/Academic/Table/A_origin_5/A_table_14.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_5",
+ "table_id": "A_table_14"
+ },
+ {
+ "index": 103,
+ "pair_id": "A_origin_5_A_table_15",
+ "image_paths": [
+ "data/Academic/Table/A_origin_5/A_table_15.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_5",
+ "table_id": "A_table_15"
+ },
+ {
+ "index": 104,
+ "pair_id": "A_origin_5_A_table_16",
+ "image_paths": [
+ "data/Academic/Table/A_origin_5/A_table_16.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_5",
+ "table_id": "A_table_16"
+ },
+ {
+ "index": 105,
+ "pair_id": "A_origin_50_A_table_102",
+ "image_paths": [
+ "data/Academic/Table/A_origin_50/A_table_102.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_50",
+ "table_id": "A_table_102"
+ },
+ {
+ "index": 106,
+ "pair_id": "A_origin_51_A_table_103",
+ "image_paths": [
+ "data/Academic/Table/A_origin_51/A_table_103.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_51",
+ "table_id": "A_table_103"
+ },
+ {
+ "index": 107,
+ "pair_id": "A_origin_51_A_table_104",
+ "image_paths": [
+ "data/Academic/Table/A_origin_51/A_table_104.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_51",
+ "table_id": "A_table_104"
+ },
+ {
+ "index": 108,
+ "pair_id": "A_origin_52_A_table_105",
+ "image_paths": [
+ "data/Academic/Table/A_origin_52/A_table_105.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_52",
+ "table_id": "A_table_105"
+ },
+ {
+ "index": 109,
+ "pair_id": "A_origin_53_A_table_106_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_53/A_table_106_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_53",
+ "table_id": "A_table_106_01"
+ },
+ {
+ "index": 110,
+ "pair_id": "A_origin_53_A_table_106_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_53/A_table_106_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_53",
+ "table_id": "A_table_106_02"
+ },
+ {
+ "index": 111,
+ "pair_id": "A_origin_53_A_table_107_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_53/A_table_107_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_53",
+ "table_id": "A_table_107_01"
+ },
+ {
+ "index": 112,
+ "pair_id": "A_origin_53_A_table_107_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_53/A_table_107_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_53",
+ "table_id": "A_table_107_02"
+ },
+ {
+ "index": 113,
+ "pair_id": "A_origin_53_A_table_108",
+ "image_paths": [
+ "data/Academic/Table/A_origin_53/A_table_108.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_53",
+ "table_id": "A_table_108"
+ },
+ {
+ "index": 114,
+ "pair_id": "A_origin_54_A_table_109",
+ "image_paths": [
+ "data/Academic/Table/A_origin_54/A_table_109.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_54",
+ "table_id": "A_table_109"
+ },
+ {
+ "index": 115,
+ "pair_id": "A_origin_54_A_table_110",
+ "image_paths": [
+ "data/Academic/Table/A_origin_54/A_table_110.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_54",
+ "table_id": "A_table_110"
+ },
+ {
+ "index": 116,
+ "pair_id": "A_origin_55_A_table_111",
+ "image_paths": [
+ "data/Academic/Table/A_origin_55/A_table_111.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_55",
+ "table_id": "A_table_111"
+ },
+ {
+ "index": 117,
+ "pair_id": "A_origin_55_A_table_112",
+ "image_paths": [
+ "data/Academic/Table/A_origin_55/A_table_112.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_55",
+ "table_id": "A_table_112"
+ },
+ {
+ "index": 118,
+ "pair_id": "A_origin_56_A_table_113",
+ "image_paths": [
+ "data/Academic/Table/A_origin_56/A_table_113.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_56",
+ "table_id": "A_table_113"
+ },
+ {
+ "index": 119,
+ "pair_id": "A_origin_56_A_table_114",
+ "image_paths": [
+ "data/Academic/Table/A_origin_56/A_table_114.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_56",
+ "table_id": "A_table_114"
+ },
+ {
+ "index": 120,
+ "pair_id": "A_origin_56_A_table_115",
+ "image_paths": [
+ "data/Academic/Table/A_origin_56/A_table_115.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_56",
+ "table_id": "A_table_115"
+ },
+ {
+ "index": 121,
+ "pair_id": "A_origin_57_A_table_116",
+ "image_paths": [
+ "data/Academic/Table/A_origin_57/A_table_116.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_57",
+ "table_id": "A_table_116"
+ },
+ {
+ "index": 122,
+ "pair_id": "A_origin_57_A_table_117",
+ "image_paths": [
+ "data/Academic/Table/A_origin_57/A_table_117.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_57",
+ "table_id": "A_table_117"
+ },
+ {
+ "index": 123,
+ "pair_id": "A_origin_57_A_table_118",
+ "image_paths": [
+ "data/Academic/Table/A_origin_57/A_table_118.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_57",
+ "table_id": "A_table_118"
+ },
+ {
+ "index": 124,
+ "pair_id": "A_origin_57_A_table_119",
+ "image_paths": [
+ "data/Academic/Table/A_origin_57/A_table_119.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_57",
+ "table_id": "A_table_119"
+ },
+ {
+ "index": 125,
+ "pair_id": "A_origin_57_A_table_120",
+ "image_paths": [
+ "data/Academic/Table/A_origin_57/A_table_120.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_57",
+ "table_id": "A_table_120"
+ },
+ {
+ "index": 126,
+ "pair_id": "A_origin_58_A_table_121",
+ "image_paths": [
+ "data/Academic/Table/A_origin_58/A_table_121.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_58",
+ "table_id": "A_table_121"
+ },
+ {
+ "index": 127,
+ "pair_id": "A_origin_58_A_table_122",
+ "image_paths": [
+ "data/Academic/Table/A_origin_58/A_table_122.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_58",
+ "table_id": "A_table_122"
+ },
+ {
+ "index": 128,
+ "pair_id": "A_origin_58_A_table_123_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_58/A_table_123_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_58",
+ "table_id": "A_table_123_01"
+ },
+ {
+ "index": 129,
+ "pair_id": "A_origin_58_A_table_123_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_58/A_table_123_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_58",
+ "table_id": "A_table_123_02"
+ },
+ {
+ "index": 130,
+ "pair_id": "A_origin_59_A_table_124_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_59/A_table_124_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_59",
+ "table_id": "A_table_124_01"
+ },
+ {
+ "index": 131,
+ "pair_id": "A_origin_59_A_table_124_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_59/A_table_124_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_59",
+ "table_id": "A_table_124_02"
+ },
+ {
+ "index": 132,
+ "pair_id": "A_origin_6_A_table_18",
+ "image_paths": [
+ "data/Academic/Table/A_origin_6/A_table_18.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_6",
+ "table_id": "A_table_18"
+ },
+ {
+ "index": 133,
+ "pair_id": "A_origin_6_A_table_19",
+ "image_paths": [
+ "data/Academic/Table/A_origin_6/A_table_19.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_6",
+ "table_id": "A_table_19"
+ },
+ {
+ "index": 134,
+ "pair_id": "A_origin_6_A_table_8",
+ "image_paths": [
+ "data/Academic/Table/A_origin_6/A_table_8.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_6",
+ "table_id": "A_table_8"
+ },
+ {
+ "index": 135,
+ "pair_id": "A_origin_60_A_table_125_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_60/A_table_125_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_60",
+ "table_id": "A_table_125_01"
+ },
+ {
+ "index": 136,
+ "pair_id": "A_origin_60_A_table_125_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_60/A_table_125_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_60",
+ "table_id": "A_table_125_02"
+ },
+ {
+ "index": 137,
+ "pair_id": "A_origin_61_A_table_126",
+ "image_paths": [
+ "data/Academic/Table/A_origin_61/A_table_126.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_61",
+ "table_id": "A_table_126"
+ },
+ {
+ "index": 138,
+ "pair_id": "A_origin_62_A_table_127_01",
+ "image_paths": [
+ "data/Academic/Table/A_origin_62/A_table_127_01.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_62",
+ "table_id": "A_table_127_01"
+ },
+ {
+ "index": 139,
+ "pair_id": "A_origin_62_A_table_127_02",
+ "image_paths": [
+ "data/Academic/Table/A_origin_62/A_table_127_02.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_62",
+ "table_id": "A_table_127_02"
+ },
+ {
+ "index": 140,
+ "pair_id": "A_origin_63_A_table_128",
+ "image_paths": [
+ "data/Academic/Table/A_origin_63/A_table_128.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_63",
+ "table_id": "A_table_128"
+ },
+ {
+ "index": 141,
+ "pair_id": "A_origin_63_A_table_129",
+ "image_paths": [
+ "data/Academic/Table/A_origin_63/A_table_129.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_63",
+ "table_id": "A_table_129"
+ },
+ {
+ "index": 142,
+ "pair_id": "A_origin_64_A_table_130",
+ "image_paths": [
+ "data/Academic/Table/A_origin_64/A_table_130.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_64",
+ "table_id": "A_table_130"
+ },
+ {
+ "index": 143,
+ "pair_id": "A_origin_64_A_table_131",
+ "image_paths": [
+ "data/Academic/Table/A_origin_64/A_table_131.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_64",
+ "table_id": "A_table_131"
+ },
+ {
+ "index": 144,
+ "pair_id": "A_origin_64_A_table_132",
+ "image_paths": [
+ "data/Academic/Table/A_origin_64/A_table_132.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_64",
+ "table_id": "A_table_132"
+ },
+ {
+ "index": 145,
+ "pair_id": "A_origin_65_A_table_133",
+ "image_paths": [
+ "data/Academic/Table/A_origin_65/A_table_133.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_65",
+ "table_id": "A_table_133"
+ },
+ {
+ "index": 146,
+ "pair_id": "A_origin_65_A_table_134",
+ "image_paths": [
+ "data/Academic/Table/A_origin_65/A_table_134.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_65",
+ "table_id": "A_table_134"
+ },
+ {
+ "index": 147,
+ "pair_id": "A_origin_66_A_table_135",
+ "image_paths": [
+ "data/Academic/Table/A_origin_66/A_table_135.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_66",
+ "table_id": "A_table_135"
+ },
+ {
+ "index": 148,
+ "pair_id": "A_origin_66_A_table_136",
+ "image_paths": [
+ "data/Academic/Table/A_origin_66/A_table_136.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_66",
+ "table_id": "A_table_136"
+ },
+ {
+ "index": 149,
+ "pair_id": "A_origin_66_A_table_137",
+ "image_paths": [
+ "data/Academic/Table/A_origin_66/A_table_137.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_66",
+ "table_id": "A_table_137"
+ },
+ {
+ "index": 150,
+ "pair_id": "A_origin_66_A_table_138",
+ "image_paths": [
+ "data/Academic/Table/A_origin_66/A_table_138.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_66",
+ "table_id": "A_table_138"
+ },
+ {
+ "index": 151,
+ "pair_id": "A_origin_67_A_table_139",
+ "image_paths": [
+ "data/Academic/Table/A_origin_67/A_table_139.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_67",
+ "table_id": "A_table_139"
+ },
+ {
+ "index": 152,
+ "pair_id": "A_origin_7_A_table_20",
+ "image_paths": [
+ "data/Academic/Table/A_origin_7/A_table_20.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_7",
+ "table_id": "A_table_20"
+ },
+ {
+ "index": 153,
+ "pair_id": "A_origin_7_A_table_21",
+ "image_paths": [
+ "data/Academic/Table/A_origin_7/A_table_21.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_7",
+ "table_id": "A_table_21"
+ },
+ {
+ "index": 154,
+ "pair_id": "A_origin_8_A_table_22",
+ "image_paths": [
+ "data/Academic/Table/A_origin_8/A_table_22.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_8",
+ "table_id": "A_table_22"
+ },
+ {
+ "index": 155,
+ "pair_id": "A_origin_8_A_table_23",
+ "image_paths": [
+ "data/Academic/Table/A_origin_8/A_table_23.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_8",
+ "table_id": "A_table_23"
+ },
+ {
+ "index": 156,
+ "pair_id": "A_origin_8_A_table_24",
+ "image_paths": [
+ "data/Academic/Table/A_origin_8/A_table_24.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_8",
+ "table_id": "A_table_24"
+ },
+ {
+ "index": 157,
+ "pair_id": "A_origin_9_A_table_25",
+ "image_paths": [
+ "data/Academic/Table/A_origin_9/A_table_25.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_9",
+ "table_id": "A_table_25"
+ },
+ {
+ "index": 158,
+ "pair_id": "A_origin_9_A_table_26",
+ "image_paths": [
+ "data/Academic/Table/A_origin_9/A_table_26.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_9",
+ "table_id": "A_table_26"
+ },
+ {
+ "index": 159,
+ "pair_id": "A_origin_9_A_table_27",
+ "image_paths": [
+ "data/Academic/Table/A_origin_9/A_table_27.png"
+ ],
+ "domain": "Academic",
+ "origin": "A_origin_9",
+ "table_id": "A_table_27"
+ }
+]
diff --git a/single_image_json_list/single_table_business_input.json b/single_image_json_list/single_table_business_input.json
new file mode 100644
index 0000000..7309657
--- /dev/null
+++ b/single_image_json_list/single_table_business_input.json
@@ -0,0 +1,1492 @@
+[
+ {
+ "index": 0,
+ "pair_id": "B_origin_0_B_table_0_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_0/B_table_0_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_0",
+ "table_id": "B_table_0_0"
+ },
+ {
+ "index": 1,
+ "pair_id": "B_origin_0_B_table_1_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_0/B_table_1_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_0",
+ "table_id": "B_table_1_0"
+ },
+ {
+ "index": 2,
+ "pair_id": "B_origin_0_B_table_2_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_0/B_table_2_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_0",
+ "table_id": "B_table_2_0"
+ },
+ {
+ "index": 3,
+ "pair_id": "B_origin_1_B_table_3_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_1/B_table_3_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_1",
+ "table_id": "B_table_3_0"
+ },
+ {
+ "index": 4,
+ "pair_id": "B_origin_1_B_table_4_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_1/B_table_4_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_1",
+ "table_id": "B_table_4_0"
+ },
+ {
+ "index": 5,
+ "pair_id": "B_origin_10_B_table_31_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_10/B_table_31_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_10",
+ "table_id": "B_table_31_0"
+ },
+ {
+ "index": 6,
+ "pair_id": "B_origin_10_B_table_32_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_10/B_table_32_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_10",
+ "table_id": "B_table_32_0"
+ },
+ {
+ "index": 7,
+ "pair_id": "B_origin_10_B_table_33_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_10/B_table_33_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_10",
+ "table_id": "B_table_33_0"
+ },
+ {
+ "index": 8,
+ "pair_id": "B_origin_10_B_table_34_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_10/B_table_34_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_10",
+ "table_id": "B_table_34_0"
+ },
+ {
+ "index": 9,
+ "pair_id": "B_origin_11_B_table_35_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_11/B_table_35_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_11",
+ "table_id": "B_table_35_0"
+ },
+ {
+ "index": 10,
+ "pair_id": "B_origin_11_B_table_36_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_11/B_table_36_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_11",
+ "table_id": "B_table_36_0"
+ },
+ {
+ "index": 11,
+ "pair_id": "B_origin_11_B_table_37_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_11/B_table_37_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_11",
+ "table_id": "B_table_37_0"
+ },
+ {
+ "index": 12,
+ "pair_id": "B_origin_11_B_table_38_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_11/B_table_38_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_11",
+ "table_id": "B_table_38_0"
+ },
+ {
+ "index": 13,
+ "pair_id": "B_origin_12_B_table_39_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_12/B_table_39_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_12",
+ "table_id": "B_table_39_0"
+ },
+ {
+ "index": 14,
+ "pair_id": "B_origin_12_B_table_40_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_12/B_table_40_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_12",
+ "table_id": "B_table_40_0"
+ },
+ {
+ "index": 15,
+ "pair_id": "B_origin_13_B_table_41_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_13/B_table_41_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_13",
+ "table_id": "B_table_41_0"
+ },
+ {
+ "index": 16,
+ "pair_id": "B_origin_13_B_table_42_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_13/B_table_42_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_13",
+ "table_id": "B_table_42_0"
+ },
+ {
+ "index": 17,
+ "pair_id": "B_origin_13_B_table_43_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_13/B_table_43_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_13",
+ "table_id": "B_table_43_0"
+ },
+ {
+ "index": 18,
+ "pair_id": "B_origin_13_B_table_44_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_13/B_table_44_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_13",
+ "table_id": "B_table_44_0"
+ },
+ {
+ "index": 19,
+ "pair_id": "B_origin_14_B_table_45_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_14/B_table_45_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_14",
+ "table_id": "B_table_45_0"
+ },
+ {
+ "index": 20,
+ "pair_id": "B_origin_14_B_table_45_1",
+ "image_paths": [
+ "data/Business/Table/B_origin_14/B_table_45_1.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_14",
+ "table_id": "B_table_45_1"
+ },
+ {
+ "index": 21,
+ "pair_id": "B_origin_14_B_table_45_2",
+ "image_paths": [
+ "data/Business/Table/B_origin_14/B_table_45_2.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_14",
+ "table_id": "B_table_45_2"
+ },
+ {
+ "index": 22,
+ "pair_id": "B_origin_14_B_table_46_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_14/B_table_46_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_14",
+ "table_id": "B_table_46_0"
+ },
+ {
+ "index": 23,
+ "pair_id": "B_origin_14_B_table_47_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_14/B_table_47_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_14",
+ "table_id": "B_table_47_0"
+ },
+ {
+ "index": 24,
+ "pair_id": "B_origin_14_B_table_48_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_14/B_table_48_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_14",
+ "table_id": "B_table_48_0"
+ },
+ {
+ "index": 25,
+ "pair_id": "B_origin_15_B_table_49_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_15/B_table_49_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_15",
+ "table_id": "B_table_49_0"
+ },
+ {
+ "index": 26,
+ "pair_id": "B_origin_15_B_table_50_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_15/B_table_50_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_15",
+ "table_id": "B_table_50_0"
+ },
+ {
+ "index": 27,
+ "pair_id": "B_origin_15_B_table_51_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_15/B_table_51_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_15",
+ "table_id": "B_table_51_0"
+ },
+ {
+ "index": 28,
+ "pair_id": "B_origin_15_B_table_51_1",
+ "image_paths": [
+ "data/Business/Table/B_origin_15/B_table_51_1.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_15",
+ "table_id": "B_table_51_1"
+ },
+ {
+ "index": 29,
+ "pair_id": "B_origin_15_B_table_52_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_15/B_table_52_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_15",
+ "table_id": "B_table_52_0"
+ },
+ {
+ "index": 30,
+ "pair_id": "B_origin_15_B_table_53_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_15/B_table_53_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_15",
+ "table_id": "B_table_53_0"
+ },
+ {
+ "index": 31,
+ "pair_id": "B_origin_16_B_table_54_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_16/B_table_54_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_16",
+ "table_id": "B_table_54_0"
+ },
+ {
+ "index": 32,
+ "pair_id": "B_origin_17_B_table_55_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_17/B_table_55_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_17",
+ "table_id": "B_table_55_0"
+ },
+ {
+ "index": 33,
+ "pair_id": "B_origin_17_B_table_56_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_17/B_table_56_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_17",
+ "table_id": "B_table_56_0"
+ },
+ {
+ "index": 34,
+ "pair_id": "B_origin_17_B_table_57_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_17/B_table_57_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_17",
+ "table_id": "B_table_57_0"
+ },
+ {
+ "index": 35,
+ "pair_id": "B_origin_17_B_table_58_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_17/B_table_58_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_17",
+ "table_id": "B_table_58_0"
+ },
+ {
+ "index": 36,
+ "pair_id": "B_origin_17_B_table_59_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_17/B_table_59_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_17",
+ "table_id": "B_table_59_0"
+ },
+ {
+ "index": 37,
+ "pair_id": "B_origin_17_B_table_60_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_17/B_table_60_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_17",
+ "table_id": "B_table_60_0"
+ },
+ {
+ "index": 38,
+ "pair_id": "B_origin_18_B_table_61_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_18/B_table_61_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_18",
+ "table_id": "B_table_61_0"
+ },
+ {
+ "index": 39,
+ "pair_id": "B_origin_18_B_table_62_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_18/B_table_62_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_18",
+ "table_id": "B_table_62_0"
+ },
+ {
+ "index": 40,
+ "pair_id": "B_origin_18_B_table_63_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_18/B_table_63_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_18",
+ "table_id": "B_table_63_0"
+ },
+ {
+ "index": 41,
+ "pair_id": "B_origin_18_B_table_63_1",
+ "image_paths": [
+ "data/Business/Table/B_origin_18/B_table_63_1.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_18",
+ "table_id": "B_table_63_1"
+ },
+ {
+ "index": 42,
+ "pair_id": "B_origin_18_B_table_64_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_18/B_table_64_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_18",
+ "table_id": "B_table_64_0"
+ },
+ {
+ "index": 43,
+ "pair_id": "B_origin_18_B_table_65_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_18/B_table_65_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_18",
+ "table_id": "B_table_65_0"
+ },
+ {
+ "index": 44,
+ "pair_id": "B_origin_19_B_table_66_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_19/B_table_66_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_19",
+ "table_id": "B_table_66_0"
+ },
+ {
+ "index": 45,
+ "pair_id": "B_origin_2_B_table_5_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_2/B_table_5_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_2",
+ "table_id": "B_table_5_0"
+ },
+ {
+ "index": 46,
+ "pair_id": "B_origin_2_B_table_6_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_2/B_table_6_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_2",
+ "table_id": "B_table_6_0"
+ },
+ {
+ "index": 47,
+ "pair_id": "B_origin_2_B_table_7_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_2/B_table_7_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_2",
+ "table_id": "B_table_7_0"
+ },
+ {
+ "index": 48,
+ "pair_id": "B_origin_2_B_table_8_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_2/B_table_8_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_2",
+ "table_id": "B_table_8_0"
+ },
+ {
+ "index": 49,
+ "pair_id": "B_origin_2_B_table_9_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_2/B_table_9_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_2",
+ "table_id": "B_table_9_0"
+ },
+ {
+ "index": 50,
+ "pair_id": "B_origin_20_B_table_67_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_20/B_table_67_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_20",
+ "table_id": "B_table_67_0"
+ },
+ {
+ "index": 51,
+ "pair_id": "B_origin_20_B_table_68_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_20/B_table_68_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_20",
+ "table_id": "B_table_68_0"
+ },
+ {
+ "index": 52,
+ "pair_id": "B_origin_20_B_table_69_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_20/B_table_69_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_20",
+ "table_id": "B_table_69_0"
+ },
+ {
+ "index": 53,
+ "pair_id": "B_origin_21_B_table_70_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_21/B_table_70_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_21",
+ "table_id": "B_table_70_0"
+ },
+ {
+ "index": 54,
+ "pair_id": "B_origin_21_B_table_71_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_21/B_table_71_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_21",
+ "table_id": "B_table_71_0"
+ },
+ {
+ "index": 55,
+ "pair_id": "B_origin_21_B_table_72_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_21/B_table_72_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_21",
+ "table_id": "B_table_72_0"
+ },
+ {
+ "index": 56,
+ "pair_id": "B_origin_21_B_table_72_1",
+ "image_paths": [
+ "data/Business/Table/B_origin_21/B_table_72_1.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_21",
+ "table_id": "B_table_72_1"
+ },
+ {
+ "index": 57,
+ "pair_id": "B_origin_21_B_table_73_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_21/B_table_73_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_21",
+ "table_id": "B_table_73_0"
+ },
+ {
+ "index": 58,
+ "pair_id": "B_origin_21_B_table_74_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_21/B_table_74_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_21",
+ "table_id": "B_table_74_0"
+ },
+ {
+ "index": 59,
+ "pair_id": "B_origin_22_B_table_75_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_22/B_table_75_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_22",
+ "table_id": "B_table_75_0"
+ },
+ {
+ "index": 60,
+ "pair_id": "B_origin_22_B_table_76_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_22/B_table_76_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_22",
+ "table_id": "B_table_76_0"
+ },
+ {
+ "index": 61,
+ "pair_id": "B_origin_22_B_table_77_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_22/B_table_77_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_22",
+ "table_id": "B_table_77_0"
+ },
+ {
+ "index": 62,
+ "pair_id": "B_origin_22_B_table_78_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_22/B_table_78_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_22",
+ "table_id": "B_table_78_0"
+ },
+ {
+ "index": 63,
+ "pair_id": "B_origin_22_B_table_79_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_22/B_table_79_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_22",
+ "table_id": "B_table_79_0"
+ },
+ {
+ "index": 64,
+ "pair_id": "B_origin_23_B_table_79_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_23/B_table_79_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_23",
+ "table_id": "B_table_79_0"
+ },
+ {
+ "index": 65,
+ "pair_id": "B_origin_23_B_table_80_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_23/B_table_80_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_23",
+ "table_id": "B_table_80_0"
+ },
+ {
+ "index": 66,
+ "pair_id": "B_origin_23_B_table_81_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_23/B_table_81_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_23",
+ "table_id": "B_table_81_0"
+ },
+ {
+ "index": 67,
+ "pair_id": "B_origin_24_B_table_82_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_24/B_table_82_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_24",
+ "table_id": "B_table_82_0"
+ },
+ {
+ "index": 68,
+ "pair_id": "B_origin_24_B_table_83_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_24/B_table_83_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_24",
+ "table_id": "B_table_83_0"
+ },
+ {
+ "index": 69,
+ "pair_id": "B_origin_24_B_table_84_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_24/B_table_84_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_24",
+ "table_id": "B_table_84_0"
+ },
+ {
+ "index": 70,
+ "pair_id": "B_origin_25_B_table_85_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_25/B_table_85_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_25",
+ "table_id": "B_table_85_0"
+ },
+ {
+ "index": 71,
+ "pair_id": "B_origin_25_B_table_86_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_25/B_table_86_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_25",
+ "table_id": "B_table_86_0"
+ },
+ {
+ "index": 72,
+ "pair_id": "B_origin_26_B_table_87_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_26/B_table_87_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_26",
+ "table_id": "B_table_87_0"
+ },
+ {
+ "index": 73,
+ "pair_id": "B_origin_26_B_table_88_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_26/B_table_88_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_26",
+ "table_id": "B_table_88_0"
+ },
+ {
+ "index": 74,
+ "pair_id": "B_origin_27_B_table_89_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_27/B_table_89_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_27",
+ "table_id": "B_table_89_0"
+ },
+ {
+ "index": 75,
+ "pair_id": "B_origin_27_B_table_90_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_27/B_table_90_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_27",
+ "table_id": "B_table_90_0"
+ },
+ {
+ "index": 76,
+ "pair_id": "B_origin_27_B_table_91_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_27/B_table_91_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_27",
+ "table_id": "B_table_91_0"
+ },
+ {
+ "index": 77,
+ "pair_id": "B_origin_28_B_table_92_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_28/B_table_92_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_28",
+ "table_id": "B_table_92_0"
+ },
+ {
+ "index": 78,
+ "pair_id": "B_origin_28_B_table_93_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_28/B_table_93_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_28",
+ "table_id": "B_table_93_0"
+ },
+ {
+ "index": 79,
+ "pair_id": "B_origin_28_B_table_94_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_28/B_table_94_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_28",
+ "table_id": "B_table_94_0"
+ },
+ {
+ "index": 80,
+ "pair_id": "B_origin_28_B_table_95_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_28/B_table_95_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_28",
+ "table_id": "B_table_95_0"
+ },
+ {
+ "index": 81,
+ "pair_id": "B_origin_28_B_table_96_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_28/B_table_96_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_28",
+ "table_id": "B_table_96_0"
+ },
+ {
+ "index": 82,
+ "pair_id": "B_origin_28_B_table_97_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_28/B_table_97_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_28",
+ "table_id": "B_table_97_0"
+ },
+ {
+ "index": 83,
+ "pair_id": "B_origin_29_B_table_100_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_29/B_table_100_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_29",
+ "table_id": "B_table_100_0"
+ },
+ {
+ "index": 84,
+ "pair_id": "B_origin_29_B_table_98_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_29/B_table_98_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_29",
+ "table_id": "B_table_98_0"
+ },
+ {
+ "index": 85,
+ "pair_id": "B_origin_29_B_table_99_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_29/B_table_99_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_29",
+ "table_id": "B_table_99_0"
+ },
+ {
+ "index": 86,
+ "pair_id": "B_origin_3_B_table_10_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_3/B_table_10_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_3",
+ "table_id": "B_table_10_0"
+ },
+ {
+ "index": 87,
+ "pair_id": "B_origin_3_B_table_11_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_3/B_table_11_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_3",
+ "table_id": "B_table_11_0"
+ },
+ {
+ "index": 88,
+ "pair_id": "B_origin_3_B_table_12_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_3/B_table_12_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_3",
+ "table_id": "B_table_12_0"
+ },
+ {
+ "index": 89,
+ "pair_id": "B_origin_3_B_table_13_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_3/B_table_13_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_3",
+ "table_id": "B_table_13_0"
+ },
+ {
+ "index": 90,
+ "pair_id": "B_origin_30_B_table_101_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_30/B_table_101_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_30",
+ "table_id": "B_table_101_0"
+ },
+ {
+ "index": 91,
+ "pair_id": "B_origin_30_B_table_102_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_30/B_table_102_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_30",
+ "table_id": "B_table_102_0"
+ },
+ {
+ "index": 92,
+ "pair_id": "B_origin_30_B_table_103_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_30/B_table_103_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_30",
+ "table_id": "B_table_103_0"
+ },
+ {
+ "index": 93,
+ "pair_id": "B_origin_31_B_table_104_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_31/B_table_104_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_31",
+ "table_id": "B_table_104_0"
+ },
+ {
+ "index": 94,
+ "pair_id": "B_origin_31_B_table_105_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_31/B_table_105_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_31",
+ "table_id": "B_table_105_0"
+ },
+ {
+ "index": 95,
+ "pair_id": "B_origin_31_B_table_106_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_31/B_table_106_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_31",
+ "table_id": "B_table_106_0"
+ },
+ {
+ "index": 96,
+ "pair_id": "B_origin_31_B_table_107_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_31/B_table_107_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_31",
+ "table_id": "B_table_107_0"
+ },
+ {
+ "index": 97,
+ "pair_id": "B_origin_31_B_table_108_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_31/B_table_108_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_31",
+ "table_id": "B_table_108_0"
+ },
+ {
+ "index": 98,
+ "pair_id": "B_origin_32_B_table_109_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_32/B_table_109_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_32",
+ "table_id": "B_table_109_0"
+ },
+ {
+ "index": 99,
+ "pair_id": "B_origin_32_B_table_110_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_32/B_table_110_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_32",
+ "table_id": "B_table_110_0"
+ },
+ {
+ "index": 100,
+ "pair_id": "B_origin_32_B_table_111_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_32/B_table_111_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_32",
+ "table_id": "B_table_111_0"
+ },
+ {
+ "index": 101,
+ "pair_id": "B_origin_32_B_table_112_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_32/B_table_112_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_32",
+ "table_id": "B_table_112_0"
+ },
+ {
+ "index": 102,
+ "pair_id": "B_origin_33_B_table_113_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_33/B_table_113_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_33",
+ "table_id": "B_table_113_0"
+ },
+ {
+ "index": 103,
+ "pair_id": "B_origin_33_B_table_114_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_33/B_table_114_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_33",
+ "table_id": "B_table_114_0"
+ },
+ {
+ "index": 104,
+ "pair_id": "B_origin_33_B_table_115_9",
+ "image_paths": [
+ "data/Business/Table/B_origin_33/B_table_115_9.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_33",
+ "table_id": "B_table_115_9"
+ },
+ {
+ "index": 105,
+ "pair_id": "B_origin_33_B_table_116_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_33/B_table_116_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_33",
+ "table_id": "B_table_116_0"
+ },
+ {
+ "index": 106,
+ "pair_id": "B_origin_33_B_table_117_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_33/B_table_117_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_33",
+ "table_id": "B_table_117_0"
+ },
+ {
+ "index": 107,
+ "pair_id": "B_origin_34_B_table_118_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_34/B_table_118_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_34",
+ "table_id": "B_table_118_0"
+ },
+ {
+ "index": 108,
+ "pair_id": "B_origin_34_B_table_119_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_34/B_table_119_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_34",
+ "table_id": "B_table_119_0"
+ },
+ {
+ "index": 109,
+ "pair_id": "B_origin_34_B_table_120_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_34/B_table_120_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_34",
+ "table_id": "B_table_120_0"
+ },
+ {
+ "index": 110,
+ "pair_id": "B_origin_34_B_table_121_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_34/B_table_121_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_34",
+ "table_id": "B_table_121_0"
+ },
+ {
+ "index": 111,
+ "pair_id": "B_origin_35_B_table_122_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_35/B_table_122_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_35",
+ "table_id": "B_table_122_0"
+ },
+ {
+ "index": 112,
+ "pair_id": "B_origin_35_B_table_123_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_35/B_table_123_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_35",
+ "table_id": "B_table_123_0"
+ },
+ {
+ "index": 113,
+ "pair_id": "B_origin_35_B_table_124_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_35/B_table_124_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_35",
+ "table_id": "B_table_124_0"
+ },
+ {
+ "index": 114,
+ "pair_id": "B_origin_35_B_table_125_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_35/B_table_125_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_35",
+ "table_id": "B_table_125_0"
+ },
+ {
+ "index": 115,
+ "pair_id": "B_origin_35_B_table_126_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_35/B_table_126_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_35",
+ "table_id": "B_table_126_0"
+ },
+ {
+ "index": 116,
+ "pair_id": "B_origin_36_B_table_127_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_36/B_table_127_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_36",
+ "table_id": "B_table_127_0"
+ },
+ {
+ "index": 117,
+ "pair_id": "B_origin_36_B_table_128_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_36/B_table_128_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_36",
+ "table_id": "B_table_128_0"
+ },
+ {
+ "index": 118,
+ "pair_id": "B_origin_36_B_table_129_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_36/B_table_129_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_36",
+ "table_id": "B_table_129_0"
+ },
+ {
+ "index": 119,
+ "pair_id": "B_origin_36_B_table_130_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_36/B_table_130_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_36",
+ "table_id": "B_table_130_0"
+ },
+ {
+ "index": 120,
+ "pair_id": "B_origin_37_B_table_131_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_37/B_table_131_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_37",
+ "table_id": "B_table_131_0"
+ },
+ {
+ "index": 121,
+ "pair_id": "B_origin_37_B_table_132_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_37/B_table_132_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_37",
+ "table_id": "B_table_132_0"
+ },
+ {
+ "index": 122,
+ "pair_id": "B_origin_37_B_table_132_1",
+ "image_paths": [
+ "data/Business/Table/B_origin_37/B_table_132_1.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_37",
+ "table_id": "B_table_132_1"
+ },
+ {
+ "index": 123,
+ "pair_id": "B_origin_37_B_table_133_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_37/B_table_133_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_37",
+ "table_id": "B_table_133_0"
+ },
+ {
+ "index": 124,
+ "pair_id": "B_origin_37_B_table_134_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_37/B_table_134_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_37",
+ "table_id": "B_table_134_0"
+ },
+ {
+ "index": 125,
+ "pair_id": "B_origin_38_B_table_135_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_38/B_table_135_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_38",
+ "table_id": "B_table_135_0"
+ },
+ {
+ "index": 126,
+ "pair_id": "B_origin_38_B_table_136_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_38/B_table_136_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_38",
+ "table_id": "B_table_136_0"
+ },
+ {
+ "index": 127,
+ "pair_id": "B_origin_38_B_table_137_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_38/B_table_137_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_38",
+ "table_id": "B_table_137_0"
+ },
+ {
+ "index": 128,
+ "pair_id": "B_origin_38_B_table_138_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_38/B_table_138_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_38",
+ "table_id": "B_table_138_0"
+ },
+ {
+ "index": 129,
+ "pair_id": "B_origin_39_B_table_139_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_39/B_table_139_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_39",
+ "table_id": "B_table_139_0"
+ },
+ {
+ "index": 130,
+ "pair_id": "B_origin_39_B_table_140_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_39/B_table_140_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_39",
+ "table_id": "B_table_140_0"
+ },
+ {
+ "index": 131,
+ "pair_id": "B_origin_4_B_table_14_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_4/B_table_14_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_4",
+ "table_id": "B_table_14_0"
+ },
+ {
+ "index": 132,
+ "pair_id": "B_origin_4_B_table_15_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_4/B_table_15_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_4",
+ "table_id": "B_table_15_0"
+ },
+ {
+ "index": 133,
+ "pair_id": "B_origin_4_B_table_16_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_4/B_table_16_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_4",
+ "table_id": "B_table_16_0"
+ },
+ {
+ "index": 134,
+ "pair_id": "B_origin_4_B_table_16_1",
+ "image_paths": [
+ "data/Business/Table/B_origin_4/B_table_16_1.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_4",
+ "table_id": "B_table_16_1"
+ },
+ {
+ "index": 135,
+ "pair_id": "B_origin_5_B_table_17_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_5/B_table_17_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_5",
+ "table_id": "B_table_17_0"
+ },
+ {
+ "index": 136,
+ "pair_id": "B_origin_5_B_table_18_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_5/B_table_18_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_5",
+ "table_id": "B_table_18_0"
+ },
+ {
+ "index": 137,
+ "pair_id": "B_origin_6_B_table_19_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_6/B_table_19_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_6",
+ "table_id": "B_table_19_0"
+ },
+ {
+ "index": 138,
+ "pair_id": "B_origin_6_B_table_20_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_6/B_table_20_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_6",
+ "table_id": "B_table_20_0"
+ },
+ {
+ "index": 139,
+ "pair_id": "B_origin_6_B_table_21_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_6/B_table_21_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_6",
+ "table_id": "B_table_21_0"
+ },
+ {
+ "index": 140,
+ "pair_id": "B_origin_6_B_table_22_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_6/B_table_22_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_6",
+ "table_id": "B_table_22_0"
+ },
+ {
+ "index": 141,
+ "pair_id": "B_origin_7_B_table_23_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_7/B_table_23_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_7",
+ "table_id": "B_table_23_0"
+ },
+ {
+ "index": 142,
+ "pair_id": "B_origin_7_B_table_24_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_7/B_table_24_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_7",
+ "table_id": "B_table_24_0"
+ },
+ {
+ "index": 143,
+ "pair_id": "B_origin_7_B_table_25_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_7/B_table_25_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_7",
+ "table_id": "B_table_25_0"
+ },
+ {
+ "index": 144,
+ "pair_id": "B_origin_8_B_table_26_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_8/B_table_26_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_8",
+ "table_id": "B_table_26_0"
+ },
+ {
+ "index": 145,
+ "pair_id": "B_origin_8_B_table_27_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_8/B_table_27_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_8",
+ "table_id": "B_table_27_0"
+ },
+ {
+ "index": 146,
+ "pair_id": "B_origin_9_B_table_28_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_9/B_table_28_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_9",
+ "table_id": "B_table_28_0"
+ },
+ {
+ "index": 147,
+ "pair_id": "B_origin_9_B_table_29_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_9/B_table_29_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_9",
+ "table_id": "B_table_29_0"
+ },
+ {
+ "index": 148,
+ "pair_id": "B_origin_9_B_table_30_0",
+ "image_paths": [
+ "data/Business/Table/B_origin_9/B_table_30_0.png"
+ ],
+ "domain": "Business",
+ "origin": "B_origin_9",
+ "table_id": "B_table_30_0"
+ }
+]
\ No newline at end of file
diff --git a/single_image_json_list/single_table_finance_input.json b/single_image_json_list/single_table_finance_input.json
new file mode 100644
index 0000000..c40cdac
--- /dev/null
+++ b/single_image_json_list/single_table_finance_input.json
@@ -0,0 +1,3152 @@
+[
+ {
+ "index": 0,
+ "pair_id": "F_origin_0_F_table_0_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_0/F_table_0_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_0",
+ "table_id": "F_table_0_0"
+ },
+ {
+ "index": 1,
+ "pair_id": "F_origin_0_F_table_0_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_0/F_table_0_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_0",
+ "table_id": "F_table_0_1"
+ },
+ {
+ "index": 2,
+ "pair_id": "F_origin_0_F_table_0_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_0/F_table_0_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_0",
+ "table_id": "F_table_0_2"
+ },
+ {
+ "index": 3,
+ "pair_id": "F_origin_0_F_table_0_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_0/F_table_0_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_0",
+ "table_id": "F_table_0_3"
+ },
+ {
+ "index": 4,
+ "pair_id": "F_origin_0_F_table_0_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_0/F_table_0_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_0",
+ "table_id": "F_table_0_4"
+ },
+ {
+ "index": 5,
+ "pair_id": "F_origin_0_F_table_1_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_0/F_table_1_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_0",
+ "table_id": "F_table_1_0"
+ },
+ {
+ "index": 6,
+ "pair_id": "F_origin_0_F_table_1_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_0/F_table_1_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_0",
+ "table_id": "F_table_1_1"
+ },
+ {
+ "index": 7,
+ "pair_id": "F_origin_1_F_table_2_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_1/F_table_2_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_1",
+ "table_id": "F_table_2_0"
+ },
+ {
+ "index": 8,
+ "pair_id": "F_origin_1_F_table_2_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_1/F_table_2_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_1",
+ "table_id": "F_table_2_1"
+ },
+ {
+ "index": 9,
+ "pair_id": "F_origin_1_F_table_2_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_1/F_table_2_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_1",
+ "table_id": "F_table_2_2"
+ },
+ {
+ "index": 10,
+ "pair_id": "F_origin_1_F_table_2_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_1/F_table_2_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_1",
+ "table_id": "F_table_2_3"
+ },
+ {
+ "index": 11,
+ "pair_id": "F_origin_1_F_table_2_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_1/F_table_2_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_1",
+ "table_id": "F_table_2_4"
+ },
+ {
+ "index": 12,
+ "pair_id": "F_origin_10_F_table_14_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_10/F_table_14_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_10",
+ "table_id": "F_table_14_0"
+ },
+ {
+ "index": 13,
+ "pair_id": "F_origin_10_F_table_14_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_10/F_table_14_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_10",
+ "table_id": "F_table_14_1"
+ },
+ {
+ "index": 14,
+ "pair_id": "F_origin_10_F_table_14_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_10/F_table_14_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_10",
+ "table_id": "F_table_14_2"
+ },
+ {
+ "index": 15,
+ "pair_id": "F_origin_10_F_table_14_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_10/F_table_14_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_10",
+ "table_id": "F_table_14_3"
+ },
+ {
+ "index": 16,
+ "pair_id": "F_origin_10_F_table_14_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_10/F_table_14_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_10",
+ "table_id": "F_table_14_4"
+ },
+ {
+ "index": 17,
+ "pair_id": "F_origin_11_F_table_15_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_11/F_table_15_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_11",
+ "table_id": "F_table_15_0"
+ },
+ {
+ "index": 18,
+ "pair_id": "F_origin_11_F_table_15_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_11/F_table_15_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_11",
+ "table_id": "F_table_15_1"
+ },
+ {
+ "index": 19,
+ "pair_id": "F_origin_11_F_table_15_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_11/F_table_15_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_11",
+ "table_id": "F_table_15_2"
+ },
+ {
+ "index": 20,
+ "pair_id": "F_origin_11_F_table_15_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_11/F_table_15_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_11",
+ "table_id": "F_table_15_3"
+ },
+ {
+ "index": 21,
+ "pair_id": "F_origin_11_F_table_15_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_11/F_table_15_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_11",
+ "table_id": "F_table_15_4"
+ },
+ {
+ "index": 22,
+ "pair_id": "F_origin_11_F_table_15_5",
+ "image_paths": [
+ "data/Finance/Table/F_origin_11/F_table_15_5.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_11",
+ "table_id": "F_table_15_5"
+ },
+ {
+ "index": 23,
+ "pair_id": "F_origin_12_F_table_16_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_12/F_table_16_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_12",
+ "table_id": "F_table_16_0"
+ },
+ {
+ "index": 24,
+ "pair_id": "F_origin_12_F_table_17_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_12/F_table_17_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_12",
+ "table_id": "F_table_17_0"
+ },
+ {
+ "index": 25,
+ "pair_id": "F_origin_12_F_table_18_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_12/F_table_18_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_12",
+ "table_id": "F_table_18_0"
+ },
+ {
+ "index": 26,
+ "pair_id": "F_origin_12_F_table_19_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_12/F_table_19_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_12",
+ "table_id": "F_table_19_0"
+ },
+ {
+ "index": 27,
+ "pair_id": "F_origin_12_F_table_20_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_12/F_table_20_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_12",
+ "table_id": "F_table_20_0"
+ },
+ {
+ "index": 28,
+ "pair_id": "F_origin_12_F_table_21_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_12/F_table_21_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_12",
+ "table_id": "F_table_21_0"
+ },
+ {
+ "index": 29,
+ "pair_id": "F_origin_13_F_table_21_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_13/F_table_21_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_13",
+ "table_id": "F_table_21_0"
+ },
+ {
+ "index": 30,
+ "pair_id": "F_origin_13_F_table_22_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_13/F_table_22_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_13",
+ "table_id": "F_table_22_0"
+ },
+ {
+ "index": 31,
+ "pair_id": "F_origin_13_F_table_23_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_13/F_table_23_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_13",
+ "table_id": "F_table_23_0"
+ },
+ {
+ "index": 32,
+ "pair_id": "F_origin_13_F_table_24_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_13/F_table_24_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_13",
+ "table_id": "F_table_24_0"
+ },
+ {
+ "index": 33,
+ "pair_id": "F_origin_13_F_table_25_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_13/F_table_25_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_13",
+ "table_id": "F_table_25_0"
+ },
+ {
+ "index": 34,
+ "pair_id": "F_origin_14_F_table_26_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_14/F_table_26_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_14",
+ "table_id": "F_table_26_0"
+ },
+ {
+ "index": 35,
+ "pair_id": "F_origin_14_F_table_27_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_14/F_table_27_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_14",
+ "table_id": "F_table_27_0"
+ },
+ {
+ "index": 36,
+ "pair_id": "F_origin_14_F_table_28_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_14/F_table_28_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_14",
+ "table_id": "F_table_28_0"
+ },
+ {
+ "index": 37,
+ "pair_id": "F_origin_14_F_table_29_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_14/F_table_29_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_14",
+ "table_id": "F_table_29_0"
+ },
+ {
+ "index": 38,
+ "pair_id": "F_origin_14_F_table_30_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_14/F_table_30_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_14",
+ "table_id": "F_table_30_0"
+ },
+ {
+ "index": 39,
+ "pair_id": "F_origin_14_F_table_31_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_14/F_table_31_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_14",
+ "table_id": "F_table_31_0"
+ },
+ {
+ "index": 40,
+ "pair_id": "F_origin_15_F_table_32_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_15/F_table_32_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_15",
+ "table_id": "F_table_32_0"
+ },
+ {
+ "index": 41,
+ "pair_id": "F_origin_15_F_table_33_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_15/F_table_33_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_15",
+ "table_id": "F_table_33_0"
+ },
+ {
+ "index": 42,
+ "pair_id": "F_origin_15_F_table_34_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_15/F_table_34_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_15",
+ "table_id": "F_table_34_0"
+ },
+ {
+ "index": 43,
+ "pair_id": "F_origin_15_F_table_35_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_15/F_table_35_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_15",
+ "table_id": "F_table_35_0"
+ },
+ {
+ "index": 44,
+ "pair_id": "F_origin_15_F_table_36_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_15/F_table_36_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_15",
+ "table_id": "F_table_36_0"
+ },
+ {
+ "index": 45,
+ "pair_id": "F_origin_16_F_table_37_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_16/F_table_37_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_16",
+ "table_id": "F_table_37_0"
+ },
+ {
+ "index": 46,
+ "pair_id": "F_origin_16_F_table_38_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_16/F_table_38_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_16",
+ "table_id": "F_table_38_0"
+ },
+ {
+ "index": 47,
+ "pair_id": "F_origin_16_F_table_39_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_16/F_table_39_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_16",
+ "table_id": "F_table_39_0"
+ },
+ {
+ "index": 48,
+ "pair_id": "F_origin_16_F_table_40_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_16/F_table_40_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_16",
+ "table_id": "F_table_40_0"
+ },
+ {
+ "index": 49,
+ "pair_id": "F_origin_16_F_table_41_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_16/F_table_41_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_16",
+ "table_id": "F_table_41_0"
+ },
+ {
+ "index": 50,
+ "pair_id": "F_origin_16_F_table_42_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_16/F_table_42_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_16",
+ "table_id": "F_table_42_0"
+ },
+ {
+ "index": 51,
+ "pair_id": "F_origin_16_F_table_43_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_16/F_table_43_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_16",
+ "table_id": "F_table_43_0"
+ },
+ {
+ "index": 52,
+ "pair_id": "F_origin_16_F_table_44_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_16/F_table_44_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_16",
+ "table_id": "F_table_44_0"
+ },
+ {
+ "index": 53,
+ "pair_id": "F_origin_16_F_table_45_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_16/F_table_45_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_16",
+ "table_id": "F_table_45_0"
+ },
+ {
+ "index": 54,
+ "pair_id": "F_origin_17_F_table_46_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_17/F_table_46_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_17",
+ "table_id": "F_table_46_0"
+ },
+ {
+ "index": 55,
+ "pair_id": "F_origin_17_F_table_47_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_17/F_table_47_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_17",
+ "table_id": "F_table_47_0"
+ },
+ {
+ "index": 56,
+ "pair_id": "F_origin_17_F_table_48_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_17/F_table_48_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_17",
+ "table_id": "F_table_48_0"
+ },
+ {
+ "index": 57,
+ "pair_id": "F_origin_17_F_table_49_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_17/F_table_49_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_17",
+ "table_id": "F_table_49_0"
+ },
+ {
+ "index": 58,
+ "pair_id": "F_origin_17_F_table_50_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_17/F_table_50_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_17",
+ "table_id": "F_table_50_0"
+ },
+ {
+ "index": 59,
+ "pair_id": "F_origin_17_F_table_51_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_17/F_table_51_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_17",
+ "table_id": "F_table_51_0"
+ },
+ {
+ "index": 60,
+ "pair_id": "F_origin_17_F_table_52_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_17/F_table_52_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_17",
+ "table_id": "F_table_52_0"
+ },
+ {
+ "index": 61,
+ "pair_id": "F_origin_17_F_table_53_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_17/F_table_53_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_17",
+ "table_id": "F_table_53_0"
+ },
+ {
+ "index": 62,
+ "pair_id": "F_origin_17_F_table_54_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_17/F_table_54_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_17",
+ "table_id": "F_table_54_0"
+ },
+ {
+ "index": 63,
+ "pair_id": "F_origin_18_F_table_55_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_18/F_table_55_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_18",
+ "table_id": "F_table_55_0"
+ },
+ {
+ "index": 64,
+ "pair_id": "F_origin_18_F_table_56_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_18/F_table_56_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_18",
+ "table_id": "F_table_56_0"
+ },
+ {
+ "index": 65,
+ "pair_id": "F_origin_18_F_table_57_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_18/F_table_57_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_18",
+ "table_id": "F_table_57_0"
+ },
+ {
+ "index": 66,
+ "pair_id": "F_origin_18_F_table_58_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_18/F_table_58_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_18",
+ "table_id": "F_table_58_0"
+ },
+ {
+ "index": 67,
+ "pair_id": "F_origin_19_F_table_59_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_19/F_table_59_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_19",
+ "table_id": "F_table_59_0"
+ },
+ {
+ "index": 68,
+ "pair_id": "F_origin_19_F_table_59_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_19/F_table_59_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_19",
+ "table_id": "F_table_59_1"
+ },
+ {
+ "index": 69,
+ "pair_id": "F_origin_19_F_table_60_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_19/F_table_60_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_19",
+ "table_id": "F_table_60_0"
+ },
+ {
+ "index": 70,
+ "pair_id": "F_origin_19_F_table_61_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_19/F_table_61_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_19",
+ "table_id": "F_table_61_0"
+ },
+ {
+ "index": 71,
+ "pair_id": "F_origin_19_F_table_62_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_19/F_table_62_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_19",
+ "table_id": "F_table_62_0"
+ },
+ {
+ "index": 72,
+ "pair_id": "F_origin_19_F_table_63_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_19/F_table_63_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_19",
+ "table_id": "F_table_63_0"
+ },
+ {
+ "index": 73,
+ "pair_id": "F_origin_19_F_table_64_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_19/F_table_64_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_19",
+ "table_id": "F_table_64_0"
+ },
+ {
+ "index": 74,
+ "pair_id": "F_origin_19_F_table_65_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_19/F_table_65_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_19",
+ "table_id": "F_table_65_0"
+ },
+ {
+ "index": 75,
+ "pair_id": "F_origin_19_F_table_66_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_19/F_table_66_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_19",
+ "table_id": "F_table_66_0"
+ },
+ {
+ "index": 76,
+ "pair_id": "F_origin_2_F_table_3_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_2/F_table_3_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_2",
+ "table_id": "F_table_3_0"
+ },
+ {
+ "index": 77,
+ "pair_id": "F_origin_2_F_table_3_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_2/F_table_3_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_2",
+ "table_id": "F_table_3_1"
+ },
+ {
+ "index": 78,
+ "pair_id": "F_origin_2_F_table_3_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_2/F_table_3_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_2",
+ "table_id": "F_table_3_2"
+ },
+ {
+ "index": 79,
+ "pair_id": "F_origin_2_F_table_3_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_2/F_table_3_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_2",
+ "table_id": "F_table_3_3"
+ },
+ {
+ "index": 80,
+ "pair_id": "F_origin_2_F_table_3_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_2/F_table_3_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_2",
+ "table_id": "F_table_3_4"
+ },
+ {
+ "index": 81,
+ "pair_id": "F_origin_2_F_table_3_5",
+ "image_paths": [
+ "data/Finance/Table/F_origin_2/F_table_3_5.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_2",
+ "table_id": "F_table_3_5"
+ },
+ {
+ "index": 82,
+ "pair_id": "F_origin_2_F_table_4_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_2/F_table_4_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_2",
+ "table_id": "F_table_4_0"
+ },
+ {
+ "index": 83,
+ "pair_id": "F_origin_2_F_table_4_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_2/F_table_4_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_2",
+ "table_id": "F_table_4_1"
+ },
+ {
+ "index": 84,
+ "pair_id": "F_origin_20_F_table_67_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_20/F_table_67_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_20",
+ "table_id": "F_table_67_0"
+ },
+ {
+ "index": 85,
+ "pair_id": "F_origin_20_F_table_67_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_20/F_table_67_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_20",
+ "table_id": "F_table_67_1"
+ },
+ {
+ "index": 86,
+ "pair_id": "F_origin_20_F_table_68_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_20/F_table_68_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_20",
+ "table_id": "F_table_68_0"
+ },
+ {
+ "index": 87,
+ "pair_id": "F_origin_20_F_table_68_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_20/F_table_68_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_20",
+ "table_id": "F_table_68_1"
+ },
+ {
+ "index": 88,
+ "pair_id": "F_origin_20_F_table_68_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_20/F_table_68_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_20",
+ "table_id": "F_table_68_2"
+ },
+ {
+ "index": 89,
+ "pair_id": "F_origin_20_F_table_68_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_20/F_table_68_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_20",
+ "table_id": "F_table_68_3"
+ },
+ {
+ "index": 90,
+ "pair_id": "F_origin_20_F_table_68_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_20/F_table_68_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_20",
+ "table_id": "F_table_68_4"
+ },
+ {
+ "index": 91,
+ "pair_id": "F_origin_20_F_table_68_5",
+ "image_paths": [
+ "data/Finance/Table/F_origin_20/F_table_68_5.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_20",
+ "table_id": "F_table_68_5"
+ },
+ {
+ "index": 92,
+ "pair_id": "F_origin_20_F_table_68_6",
+ "image_paths": [
+ "data/Finance/Table/F_origin_20/F_table_68_6.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_20",
+ "table_id": "F_table_68_6"
+ },
+ {
+ "index": 93,
+ "pair_id": "F_origin_20_F_table_69_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_20/F_table_69_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_20",
+ "table_id": "F_table_69_0"
+ },
+ {
+ "index": 94,
+ "pair_id": "F_origin_21_F_table_70_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_21/F_table_70_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_21",
+ "table_id": "F_table_70_0"
+ },
+ {
+ "index": 95,
+ "pair_id": "F_origin_21_F_table_70_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_21/F_table_70_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_21",
+ "table_id": "F_table_70_1"
+ },
+ {
+ "index": 96,
+ "pair_id": "F_origin_21_F_table_70_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_21/F_table_70_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_21",
+ "table_id": "F_table_70_2"
+ },
+ {
+ "index": 97,
+ "pair_id": "F_origin_21_F_table_70_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_21/F_table_70_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_21",
+ "table_id": "F_table_70_3"
+ },
+ {
+ "index": 98,
+ "pair_id": "F_origin_21_F_table_71_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_21/F_table_71_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_21",
+ "table_id": "F_table_71_0"
+ },
+ {
+ "index": 99,
+ "pair_id": "F_origin_22_F_table_72_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_22/F_table_72_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_22",
+ "table_id": "F_table_72_0"
+ },
+ {
+ "index": 100,
+ "pair_id": "F_origin_22_F_table_72_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_22/F_table_72_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_22",
+ "table_id": "F_table_72_1"
+ },
+ {
+ "index": 101,
+ "pair_id": "F_origin_22_F_table_72_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_22/F_table_72_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_22",
+ "table_id": "F_table_72_2"
+ },
+ {
+ "index": 102,
+ "pair_id": "F_origin_22_F_table_72_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_22/F_table_72_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_22",
+ "table_id": "F_table_72_3"
+ },
+ {
+ "index": 103,
+ "pair_id": "F_origin_22_F_table_72_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_22/F_table_72_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_22",
+ "table_id": "F_table_72_4"
+ },
+ {
+ "index": 104,
+ "pair_id": "F_origin_22_F_table_72_5",
+ "image_paths": [
+ "data/Finance/Table/F_origin_22/F_table_72_5.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_22",
+ "table_id": "F_table_72_5"
+ },
+ {
+ "index": 105,
+ "pair_id": "F_origin_22_F_table_73_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_22/F_table_73_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_22",
+ "table_id": "F_table_73_0"
+ },
+ {
+ "index": 106,
+ "pair_id": "F_origin_23_F_table_74_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_23/F_table_74_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_23",
+ "table_id": "F_table_74_0"
+ },
+ {
+ "index": 107,
+ "pair_id": "F_origin_23_F_table_74_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_23/F_table_74_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_23",
+ "table_id": "F_table_74_1"
+ },
+ {
+ "index": 108,
+ "pair_id": "F_origin_23_F_table_74_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_23/F_table_74_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_23",
+ "table_id": "F_table_74_2"
+ },
+ {
+ "index": 109,
+ "pair_id": "F_origin_23_F_table_74_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_23/F_table_74_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_23",
+ "table_id": "F_table_74_3"
+ },
+ {
+ "index": 110,
+ "pair_id": "F_origin_23_F_table_74_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_23/F_table_74_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_23",
+ "table_id": "F_table_74_4"
+ },
+ {
+ "index": 111,
+ "pair_id": "F_origin_23_F_table_74_5",
+ "image_paths": [
+ "data/Finance/Table/F_origin_23/F_table_74_5.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_23",
+ "table_id": "F_table_74_5"
+ },
+ {
+ "index": 112,
+ "pair_id": "F_origin_23_F_table_75_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_23/F_table_75_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_23",
+ "table_id": "F_table_75_0"
+ },
+ {
+ "index": 113,
+ "pair_id": "F_origin_24_F_table_75_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_24/F_table_75_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_24",
+ "table_id": "F_table_75_0"
+ },
+ {
+ "index": 114,
+ "pair_id": "F_origin_24_F_table_75_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_24/F_table_75_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_24",
+ "table_id": "F_table_75_1"
+ },
+ {
+ "index": 115,
+ "pair_id": "F_origin_24_F_table_75_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_24/F_table_75_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_24",
+ "table_id": "F_table_75_2"
+ },
+ {
+ "index": 116,
+ "pair_id": "F_origin_24_F_table_75_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_24/F_table_75_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_24",
+ "table_id": "F_table_75_3"
+ },
+ {
+ "index": 117,
+ "pair_id": "F_origin_24_F_table_75_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_24/F_table_75_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_24",
+ "table_id": "F_table_75_4"
+ },
+ {
+ "index": 118,
+ "pair_id": "F_origin_24_F_table_75_5",
+ "image_paths": [
+ "data/Finance/Table/F_origin_24/F_table_75_5.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_24",
+ "table_id": "F_table_75_5"
+ },
+ {
+ "index": 119,
+ "pair_id": "F_origin_24_F_table_75_6",
+ "image_paths": [
+ "data/Finance/Table/F_origin_24/F_table_75_6.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_24",
+ "table_id": "F_table_75_6"
+ },
+ {
+ "index": 120,
+ "pair_id": "F_origin_24_F_table_75_7",
+ "image_paths": [
+ "data/Finance/Table/F_origin_24/F_table_75_7.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_24",
+ "table_id": "F_table_75_7"
+ },
+ {
+ "index": 121,
+ "pair_id": "F_origin_24_F_table_76_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_24/F_table_76_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_24",
+ "table_id": "F_table_76_0"
+ },
+ {
+ "index": 122,
+ "pair_id": "F_origin_24_F_table_76_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_24/F_table_76_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_24",
+ "table_id": "F_table_76_1"
+ },
+ {
+ "index": 123,
+ "pair_id": "F_origin_24_F_table_76_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_24/F_table_76_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_24",
+ "table_id": "F_table_76_2"
+ },
+ {
+ "index": 124,
+ "pair_id": "F_origin_24_F_table_76_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_24/F_table_76_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_24",
+ "table_id": "F_table_76_3"
+ },
+ {
+ "index": 125,
+ "pair_id": "F_origin_24_F_table_76_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_24/F_table_76_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_24",
+ "table_id": "F_table_76_4"
+ },
+ {
+ "index": 126,
+ "pair_id": "F_origin_24_F_table_77_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_24/F_table_77_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_24",
+ "table_id": "F_table_77_0"
+ },
+ {
+ "index": 127,
+ "pair_id": "F_origin_25_F_table_79_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_25/F_table_79_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_25",
+ "table_id": "F_table_79_0"
+ },
+ {
+ "index": 128,
+ "pair_id": "F_origin_25_F_table_80_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_25/F_table_80_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_25",
+ "table_id": "F_table_80_0"
+ },
+ {
+ "index": 129,
+ "pair_id": "F_origin_25_F_table_81_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_25/F_table_81_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_25",
+ "table_id": "F_table_81_0"
+ },
+ {
+ "index": 130,
+ "pair_id": "F_origin_25_F_table_82_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_25/F_table_82_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_25",
+ "table_id": "F_table_82_0"
+ },
+ {
+ "index": 131,
+ "pair_id": "F_origin_25_F_table_82_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_25/F_table_82_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_25",
+ "table_id": "F_table_82_1"
+ },
+ {
+ "index": 132,
+ "pair_id": "F_origin_25_F_table_82_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_25/F_table_82_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_25",
+ "table_id": "F_table_82_2"
+ },
+ {
+ "index": 133,
+ "pair_id": "F_origin_25_F_table_83_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_25/F_table_83_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_25",
+ "table_id": "F_table_83_0"
+ },
+ {
+ "index": 134,
+ "pair_id": "F_origin_26_F_table_84_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_26/F_table_84_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_26",
+ "table_id": "F_table_84_0"
+ },
+ {
+ "index": 135,
+ "pair_id": "F_origin_26_F_table_84_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_26/F_table_84_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_26",
+ "table_id": "F_table_84_1"
+ },
+ {
+ "index": 136,
+ "pair_id": "F_origin_26_F_table_85_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_26/F_table_85_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_26",
+ "table_id": "F_table_85_0"
+ },
+ {
+ "index": 137,
+ "pair_id": "F_origin_26_F_table_86_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_26/F_table_86_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_26",
+ "table_id": "F_table_86_0"
+ },
+ {
+ "index": 138,
+ "pair_id": "F_origin_26_F_table_87_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_26/F_table_87_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_26",
+ "table_id": "F_table_87_0"
+ },
+ {
+ "index": 139,
+ "pair_id": "F_origin_27_F_table_88_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_27/F_table_88_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_27",
+ "table_id": "F_table_88_0"
+ },
+ {
+ "index": 140,
+ "pair_id": "F_origin_27_F_table_88_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_27/F_table_88_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_27",
+ "table_id": "F_table_88_1"
+ },
+ {
+ "index": 141,
+ "pair_id": "F_origin_27_F_table_89_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_27/F_table_89_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_27",
+ "table_id": "F_table_89_0"
+ },
+ {
+ "index": 142,
+ "pair_id": "F_origin_27_F_table_89_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_27/F_table_89_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_27",
+ "table_id": "F_table_89_1"
+ },
+ {
+ "index": 143,
+ "pair_id": "F_origin_27_F_table_89_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_27/F_table_89_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_27",
+ "table_id": "F_table_89_2"
+ },
+ {
+ "index": 144,
+ "pair_id": "F_origin_27_F_table_90_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_27/F_table_90_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_27",
+ "table_id": "F_table_90_0"
+ },
+ {
+ "index": 145,
+ "pair_id": "F_origin_28_F_table_91_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_28/F_table_91_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_28",
+ "table_id": "F_table_91_0"
+ },
+ {
+ "index": 146,
+ "pair_id": "F_origin_28_F_table_91_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_28/F_table_91_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_28",
+ "table_id": "F_table_91_1"
+ },
+ {
+ "index": 147,
+ "pair_id": "F_origin_28_F_table_91_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_28/F_table_91_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_28",
+ "table_id": "F_table_91_2"
+ },
+ {
+ "index": 148,
+ "pair_id": "F_origin_28_F_table_91_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_28/F_table_91_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_28",
+ "table_id": "F_table_91_3"
+ },
+ {
+ "index": 149,
+ "pair_id": "F_origin_28_F_table_92_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_28/F_table_92_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_28",
+ "table_id": "F_table_92_0"
+ },
+ {
+ "index": 150,
+ "pair_id": "F_origin_29_F_table_93_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_29/F_table_93_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_29",
+ "table_id": "F_table_93_0"
+ },
+ {
+ "index": 151,
+ "pair_id": "F_origin_29_F_table_93_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_29/F_table_93_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_29",
+ "table_id": "F_table_93_1"
+ },
+ {
+ "index": 152,
+ "pair_id": "F_origin_29_F_table_93_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_29/F_table_93_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_29",
+ "table_id": "F_table_93_2"
+ },
+ {
+ "index": 153,
+ "pair_id": "F_origin_29_F_table_93_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_29/F_table_93_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_29",
+ "table_id": "F_table_93_3"
+ },
+ {
+ "index": 154,
+ "pair_id": "F_origin_29_F_table_93_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_29/F_table_93_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_29",
+ "table_id": "F_table_93_4"
+ },
+ {
+ "index": 155,
+ "pair_id": "F_origin_29_F_table_94_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_29/F_table_94_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_29",
+ "table_id": "F_table_94_0"
+ },
+ {
+ "index": 156,
+ "pair_id": "F_origin_3_F_table_5_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_3/F_table_5_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_3",
+ "table_id": "F_table_5_0"
+ },
+ {
+ "index": 157,
+ "pair_id": "F_origin_3_F_table_5_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_3/F_table_5_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_3",
+ "table_id": "F_table_5_1"
+ },
+ {
+ "index": 158,
+ "pair_id": "F_origin_3_F_table_5_10",
+ "image_paths": [
+ "data/Finance/Table/F_origin_3/F_table_5_10.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_3",
+ "table_id": "F_table_5_10"
+ },
+ {
+ "index": 159,
+ "pair_id": "F_origin_3_F_table_5_11",
+ "image_paths": [
+ "data/Finance/Table/F_origin_3/F_table_5_11.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_3",
+ "table_id": "F_table_5_11"
+ },
+ {
+ "index": 160,
+ "pair_id": "F_origin_3_F_table_5_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_3/F_table_5_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_3",
+ "table_id": "F_table_5_2"
+ },
+ {
+ "index": 161,
+ "pair_id": "F_origin_3_F_table_5_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_3/F_table_5_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_3",
+ "table_id": "F_table_5_3"
+ },
+ {
+ "index": 162,
+ "pair_id": "F_origin_3_F_table_5_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_3/F_table_5_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_3",
+ "table_id": "F_table_5_4"
+ },
+ {
+ "index": 163,
+ "pair_id": "F_origin_3_F_table_5_5",
+ "image_paths": [
+ "data/Finance/Table/F_origin_3/F_table_5_5.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_3",
+ "table_id": "F_table_5_5"
+ },
+ {
+ "index": 164,
+ "pair_id": "F_origin_3_F_table_5_6",
+ "image_paths": [
+ "data/Finance/Table/F_origin_3/F_table_5_6.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_3",
+ "table_id": "F_table_5_6"
+ },
+ {
+ "index": 165,
+ "pair_id": "F_origin_3_F_table_5_7",
+ "image_paths": [
+ "data/Finance/Table/F_origin_3/F_table_5_7.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_3",
+ "table_id": "F_table_5_7"
+ },
+ {
+ "index": 166,
+ "pair_id": "F_origin_3_F_table_5_8",
+ "image_paths": [
+ "data/Finance/Table/F_origin_3/F_table_5_8.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_3",
+ "table_id": "F_table_5_8"
+ },
+ {
+ "index": 167,
+ "pair_id": "F_origin_3_F_table_5_9",
+ "image_paths": [
+ "data/Finance/Table/F_origin_3/F_table_5_9.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_3",
+ "table_id": "F_table_5_9"
+ },
+ {
+ "index": 168,
+ "pair_id": "F_origin_30_F_table_95_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_30/F_table_95_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_30",
+ "table_id": "F_table_95_0"
+ },
+ {
+ "index": 169,
+ "pair_id": "F_origin_30_F_table_96_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_30/F_table_96_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_30",
+ "table_id": "F_table_96_0"
+ },
+ {
+ "index": 170,
+ "pair_id": "F_origin_30_F_table_97_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_30/F_table_97_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_30",
+ "table_id": "F_table_97_0"
+ },
+ {
+ "index": 171,
+ "pair_id": "F_origin_30_F_table_97_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_30/F_table_97_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_30",
+ "table_id": "F_table_97_1"
+ },
+ {
+ "index": 172,
+ "pair_id": "F_origin_30_F_table_97_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_30/F_table_97_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_30",
+ "table_id": "F_table_97_2"
+ },
+ {
+ "index": 173,
+ "pair_id": "F_origin_31_F_table_100_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_31/F_table_100_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_31",
+ "table_id": "F_table_100_0"
+ },
+ {
+ "index": 174,
+ "pair_id": "F_origin_31_F_table_100_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_31/F_table_100_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_31",
+ "table_id": "F_table_100_1"
+ },
+ {
+ "index": 175,
+ "pair_id": "F_origin_31_F_table_100_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_31/F_table_100_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_31",
+ "table_id": "F_table_100_2"
+ },
+ {
+ "index": 176,
+ "pair_id": "F_origin_31_F_table_98_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_31/F_table_98_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_31",
+ "table_id": "F_table_98_0"
+ },
+ {
+ "index": 177,
+ "pair_id": "F_origin_31_F_table_99_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_31/F_table_99_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_31",
+ "table_id": "F_table_99_0"
+ },
+ {
+ "index": 178,
+ "pair_id": "F_origin_32_F_table_101_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_32/F_table_101_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_32",
+ "table_id": "F_table_101_0"
+ },
+ {
+ "index": 179,
+ "pair_id": "F_origin_32_F_table_102_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_32/F_table_102_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_32",
+ "table_id": "F_table_102_0"
+ },
+ {
+ "index": 180,
+ "pair_id": "F_origin_32_F_table_103_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_32/F_table_103_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_32",
+ "table_id": "F_table_103_0"
+ },
+ {
+ "index": 181,
+ "pair_id": "F_origin_32_F_table_103_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_32/F_table_103_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_32",
+ "table_id": "F_table_103_1"
+ },
+ {
+ "index": 182,
+ "pair_id": "F_origin_32_F_table_103_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_32/F_table_103_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_32",
+ "table_id": "F_table_103_2"
+ },
+ {
+ "index": 183,
+ "pair_id": "F_origin_33_F_table_104_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_33/F_table_104_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_33",
+ "table_id": "F_table_104_0"
+ },
+ {
+ "index": 184,
+ "pair_id": "F_origin_33_F_table_105_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_33/F_table_105_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_33",
+ "table_id": "F_table_105_0"
+ },
+ {
+ "index": 185,
+ "pair_id": "F_origin_33_F_table_106_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_33/F_table_106_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_33",
+ "table_id": "F_table_106_0"
+ },
+ {
+ "index": 186,
+ "pair_id": "F_origin_33_F_table_106_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_33/F_table_106_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_33",
+ "table_id": "F_table_106_1"
+ },
+ {
+ "index": 187,
+ "pair_id": "F_origin_33_F_table_106_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_33/F_table_106_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_33",
+ "table_id": "F_table_106_2"
+ },
+ {
+ "index": 188,
+ "pair_id": "F_origin_34_F_table_107_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_34/F_table_107_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_34",
+ "table_id": "F_table_107_0"
+ },
+ {
+ "index": 189,
+ "pair_id": "F_origin_34_F_table_108_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_34/F_table_108_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_34",
+ "table_id": "F_table_108_0"
+ },
+ {
+ "index": 190,
+ "pair_id": "F_origin_34_F_table_109_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_34/F_table_109_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_34",
+ "table_id": "F_table_109_0"
+ },
+ {
+ "index": 191,
+ "pair_id": "F_origin_34_F_table_109_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_34/F_table_109_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_34",
+ "table_id": "F_table_109_1"
+ },
+ {
+ "index": 192,
+ "pair_id": "F_origin_34_F_table_109_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_34/F_table_109_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_34",
+ "table_id": "F_table_109_2"
+ },
+ {
+ "index": 193,
+ "pair_id": "F_origin_35_F_table_110_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_35/F_table_110_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_35",
+ "table_id": "F_table_110_0"
+ },
+ {
+ "index": 194,
+ "pair_id": "F_origin_35_F_table_111_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_35/F_table_111_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_35",
+ "table_id": "F_table_111_0"
+ },
+ {
+ "index": 195,
+ "pair_id": "F_origin_35_F_table_112_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_35/F_table_112_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_35",
+ "table_id": "F_table_112_0"
+ },
+ {
+ "index": 196,
+ "pair_id": "F_origin_35_F_table_112_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_35/F_table_112_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_35",
+ "table_id": "F_table_112_1"
+ },
+ {
+ "index": 197,
+ "pair_id": "F_origin_35_F_table_112_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_35/F_table_112_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_35",
+ "table_id": "F_table_112_2"
+ },
+ {
+ "index": 198,
+ "pair_id": "F_origin_36_F_table_113_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_36/F_table_113_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_36",
+ "table_id": "F_table_113_0"
+ },
+ {
+ "index": 199,
+ "pair_id": "F_origin_36_F_table_114_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_36/F_table_114_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_36",
+ "table_id": "F_table_114_0"
+ },
+ {
+ "index": 200,
+ "pair_id": "F_origin_36_F_table_115_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_36/F_table_115_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_36",
+ "table_id": "F_table_115_0"
+ },
+ {
+ "index": 201,
+ "pair_id": "F_origin_36_F_table_115_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_36/F_table_115_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_36",
+ "table_id": "F_table_115_1"
+ },
+ {
+ "index": 202,
+ "pair_id": "F_origin_36_F_table_115_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_36/F_table_115_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_36",
+ "table_id": "F_table_115_2"
+ },
+ {
+ "index": 203,
+ "pair_id": "F_origin_36_F_table_116_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_36/F_table_116_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_36",
+ "table_id": "F_table_116_0"
+ },
+ {
+ "index": 204,
+ "pair_id": "F_origin_37_F_table_117_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_37/F_table_117_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_37",
+ "table_id": "F_table_117_0"
+ },
+ {
+ "index": 205,
+ "pair_id": "F_origin_37_F_table_118_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_37/F_table_118_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_37",
+ "table_id": "F_table_118_0"
+ },
+ {
+ "index": 206,
+ "pair_id": "F_origin_37_F_table_119_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_37/F_table_119_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_37",
+ "table_id": "F_table_119_0"
+ },
+ {
+ "index": 207,
+ "pair_id": "F_origin_37_F_table_119_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_37/F_table_119_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_37",
+ "table_id": "F_table_119_1"
+ },
+ {
+ "index": 208,
+ "pair_id": "F_origin_37_F_table_119_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_37/F_table_119_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_37",
+ "table_id": "F_table_119_2"
+ },
+ {
+ "index": 209,
+ "pair_id": "F_origin_37_F_table_119_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_37/F_table_119_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_37",
+ "table_id": "F_table_119_3"
+ },
+ {
+ "index": 210,
+ "pair_id": "F_origin_37_F_table_119_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_37/F_table_119_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_37",
+ "table_id": "F_table_119_4"
+ },
+ {
+ "index": 211,
+ "pair_id": "F_origin_38_F_table_120_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_38/F_table_120_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_38",
+ "table_id": "F_table_120_0"
+ },
+ {
+ "index": 212,
+ "pair_id": "F_origin_38_F_table_121_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_38/F_table_121_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_38",
+ "table_id": "F_table_121_0"
+ },
+ {
+ "index": 213,
+ "pair_id": "F_origin_38_F_table_122_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_38/F_table_122_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_38",
+ "table_id": "F_table_122_0"
+ },
+ {
+ "index": 214,
+ "pair_id": "F_origin_38_F_table_122_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_38/F_table_122_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_38",
+ "table_id": "F_table_122_1"
+ },
+ {
+ "index": 215,
+ "pair_id": "F_origin_38_F_table_122_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_38/F_table_122_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_38",
+ "table_id": "F_table_122_2"
+ },
+ {
+ "index": 216,
+ "pair_id": "F_origin_39_F_table_123_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_39/F_table_123_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_39",
+ "table_id": "F_table_123_0"
+ },
+ {
+ "index": 217,
+ "pair_id": "F_origin_39_F_table_124_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_39/F_table_124_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_39",
+ "table_id": "F_table_124_0"
+ },
+ {
+ "index": 218,
+ "pair_id": "F_origin_39_F_table_125_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_39/F_table_125_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_39",
+ "table_id": "F_table_125_0"
+ },
+ {
+ "index": 219,
+ "pair_id": "F_origin_39_F_table_125_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_39/F_table_125_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_39",
+ "table_id": "F_table_125_1"
+ },
+ {
+ "index": 220,
+ "pair_id": "F_origin_39_F_table_125_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_39/F_table_125_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_39",
+ "table_id": "F_table_125_2"
+ },
+ {
+ "index": 221,
+ "pair_id": "F_origin_4_F_table_6_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_4/F_table_6_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_4",
+ "table_id": "F_table_6_0"
+ },
+ {
+ "index": 222,
+ "pair_id": "F_origin_4_F_table_6_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_4/F_table_6_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_4",
+ "table_id": "F_table_6_1"
+ },
+ {
+ "index": 223,
+ "pair_id": "F_origin_4_F_table_6_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_4/F_table_6_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_4",
+ "table_id": "F_table_6_2"
+ },
+ {
+ "index": 224,
+ "pair_id": "F_origin_4_F_table_6_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_4/F_table_6_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_4",
+ "table_id": "F_table_6_3"
+ },
+ {
+ "index": 225,
+ "pair_id": "F_origin_40_F_table_126_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_40/F_table_126_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_40",
+ "table_id": "F_table_126_0"
+ },
+ {
+ "index": 226,
+ "pair_id": "F_origin_40_F_table_127_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_40/F_table_127_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_40",
+ "table_id": "F_table_127_0"
+ },
+ {
+ "index": 227,
+ "pair_id": "F_origin_40_F_table_128_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_40/F_table_128_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_40",
+ "table_id": "F_table_128_0"
+ },
+ {
+ "index": 228,
+ "pair_id": "F_origin_40_F_table_128_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_40/F_table_128_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_40",
+ "table_id": "F_table_128_1"
+ },
+ {
+ "index": 229,
+ "pair_id": "F_origin_40_F_table_128_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_40/F_table_128_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_40",
+ "table_id": "F_table_128_2"
+ },
+ {
+ "index": 230,
+ "pair_id": "F_origin_41_F_table_129_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_41/F_table_129_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_41",
+ "table_id": "F_table_129_0"
+ },
+ {
+ "index": 231,
+ "pair_id": "F_origin_41_F_table_129_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_41/F_table_129_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_41",
+ "table_id": "F_table_129_1"
+ },
+ {
+ "index": 232,
+ "pair_id": "F_origin_41_F_table_129_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_41/F_table_129_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_41",
+ "table_id": "F_table_129_2"
+ },
+ {
+ "index": 233,
+ "pair_id": "F_origin_41_F_table_129_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_41/F_table_129_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_41",
+ "table_id": "F_table_129_3"
+ },
+ {
+ "index": 234,
+ "pair_id": "F_origin_41_F_table_129_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_41/F_table_129_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_41",
+ "table_id": "F_table_129_4"
+ },
+ {
+ "index": 235,
+ "pair_id": "F_origin_41_F_table_129_5",
+ "image_paths": [
+ "data/Finance/Table/F_origin_41/F_table_129_5.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_41",
+ "table_id": "F_table_129_5"
+ },
+ {
+ "index": 236,
+ "pair_id": "F_origin_42_F_table_130_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_42/F_table_130_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_42",
+ "table_id": "F_table_130_0"
+ },
+ {
+ "index": 237,
+ "pair_id": "F_origin_42_F_table_130_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_42/F_table_130_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_42",
+ "table_id": "F_table_130_1"
+ },
+ {
+ "index": 238,
+ "pair_id": "F_origin_42_F_table_130_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_42/F_table_130_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_42",
+ "table_id": "F_table_130_2"
+ },
+ {
+ "index": 239,
+ "pair_id": "F_origin_42_F_table_130_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_42/F_table_130_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_42",
+ "table_id": "F_table_130_3"
+ },
+ {
+ "index": 240,
+ "pair_id": "F_origin_42_F_table_130_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_42/F_table_130_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_42",
+ "table_id": "F_table_130_4"
+ },
+ {
+ "index": 241,
+ "pair_id": "F_origin_42_F_table_130_5",
+ "image_paths": [
+ "data/Finance/Table/F_origin_42/F_table_130_5.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_42",
+ "table_id": "F_table_130_5"
+ },
+ {
+ "index": 242,
+ "pair_id": "F_origin_43_F_table_131_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_43/F_table_131_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_43",
+ "table_id": "F_table_131_0"
+ },
+ {
+ "index": 243,
+ "pair_id": "F_origin_43_F_table_131_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_43/F_table_131_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_43",
+ "table_id": "F_table_131_1"
+ },
+ {
+ "index": 244,
+ "pair_id": "F_origin_43_F_table_131_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_43/F_table_131_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_43",
+ "table_id": "F_table_131_2"
+ },
+ {
+ "index": 245,
+ "pair_id": "F_origin_43_F_table_131_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_43/F_table_131_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_43",
+ "table_id": "F_table_131_3"
+ },
+ {
+ "index": 246,
+ "pair_id": "F_origin_43_F_table_131_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_43/F_table_131_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_43",
+ "table_id": "F_table_131_4"
+ },
+ {
+ "index": 247,
+ "pair_id": "F_origin_44_F_table_132_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_44/F_table_132_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_44",
+ "table_id": "F_table_132_0"
+ },
+ {
+ "index": 248,
+ "pair_id": "F_origin_44_F_table_132_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_44/F_table_132_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_44",
+ "table_id": "F_table_132_1"
+ },
+ {
+ "index": 249,
+ "pair_id": "F_origin_44_F_table_133_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_44/F_table_133_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_44",
+ "table_id": "F_table_133_0"
+ },
+ {
+ "index": 250,
+ "pair_id": "F_origin_44_F_table_133_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_44/F_table_133_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_44",
+ "table_id": "F_table_133_1"
+ },
+ {
+ "index": 251,
+ "pair_id": "F_origin_44_F_table_133_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_44/F_table_133_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_44",
+ "table_id": "F_table_133_2"
+ },
+ {
+ "index": 252,
+ "pair_id": "F_origin_44_F_table_134_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_44/F_table_134_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_44",
+ "table_id": "F_table_134_0"
+ },
+ {
+ "index": 253,
+ "pair_id": "F_origin_44_F_table_135_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_44/F_table_135_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_44",
+ "table_id": "F_table_135_0"
+ },
+ {
+ "index": 254,
+ "pair_id": "F_origin_44_F_table_135_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_44/F_table_135_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_44",
+ "table_id": "F_table_135_1"
+ },
+ {
+ "index": 255,
+ "pair_id": "F_origin_44_F_table_135_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_44/F_table_135_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_44",
+ "table_id": "F_table_135_2"
+ },
+ {
+ "index": 256,
+ "pair_id": "F_origin_45_F_table_136_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_45/F_table_136_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_45",
+ "table_id": "F_table_136_0"
+ },
+ {
+ "index": 257,
+ "pair_id": "F_origin_45_F_table_136_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_45/F_table_136_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_45",
+ "table_id": "F_table_136_1"
+ },
+ {
+ "index": 258,
+ "pair_id": "F_origin_45_F_table_136_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_45/F_table_136_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_45",
+ "table_id": "F_table_136_2"
+ },
+ {
+ "index": 259,
+ "pair_id": "F_origin_45_F_table_136_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_45/F_table_136_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_45",
+ "table_id": "F_table_136_3"
+ },
+ {
+ "index": 260,
+ "pair_id": "F_origin_45_F_table_136_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_45/F_table_136_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_45",
+ "table_id": "F_table_136_4"
+ },
+ {
+ "index": 261,
+ "pair_id": "F_origin_45_F_table_136_5",
+ "image_paths": [
+ "data/Finance/Table/F_origin_45/F_table_136_5.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_45",
+ "table_id": "F_table_136_5"
+ },
+ {
+ "index": 262,
+ "pair_id": "F_origin_46_F_table_137_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_46/F_table_137_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_46",
+ "table_id": "F_table_137_0"
+ },
+ {
+ "index": 263,
+ "pair_id": "F_origin_46_F_table_137_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_46/F_table_137_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_46",
+ "table_id": "F_table_137_1"
+ },
+ {
+ "index": 264,
+ "pair_id": "F_origin_46_F_table_138_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_46/F_table_138_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_46",
+ "table_id": "F_table_138_0"
+ },
+ {
+ "index": 265,
+ "pair_id": "F_origin_46_F_table_138_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_46/F_table_138_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_46",
+ "table_id": "F_table_138_1"
+ },
+ {
+ "index": 266,
+ "pair_id": "F_origin_46_F_table_139_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_46/F_table_139_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_46",
+ "table_id": "F_table_139_0"
+ },
+ {
+ "index": 267,
+ "pair_id": "F_origin_46_F_table_139_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_46/F_table_139_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_46",
+ "table_id": "F_table_139_1"
+ },
+ {
+ "index": 268,
+ "pair_id": "F_origin_47_F_table_140_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_47/F_table_140_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_47",
+ "table_id": "F_table_140_0"
+ },
+ {
+ "index": 269,
+ "pair_id": "F_origin_47_F_table_141_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_47/F_table_141_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_47",
+ "table_id": "F_table_141_0"
+ },
+ {
+ "index": 270,
+ "pair_id": "F_origin_47_F_table_141_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_47/F_table_141_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_47",
+ "table_id": "F_table_141_1"
+ },
+ {
+ "index": 271,
+ "pair_id": "F_origin_47_F_table_142_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_47/F_table_142_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_47",
+ "table_id": "F_table_142_0"
+ },
+ {
+ "index": 272,
+ "pair_id": "F_origin_47_F_table_142_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_47/F_table_142_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_47",
+ "table_id": "F_table_142_1"
+ },
+ {
+ "index": 273,
+ "pair_id": "F_origin_47_F_table_142_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_47/F_table_142_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_47",
+ "table_id": "F_table_142_2"
+ },
+ {
+ "index": 274,
+ "pair_id": "F_origin_47_F_table_142_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_47/F_table_142_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_47",
+ "table_id": "F_table_142_3"
+ },
+ {
+ "index": 275,
+ "pair_id": "F_origin_48_F_table_143_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_48/F_table_143_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_48",
+ "table_id": "F_table_143_0"
+ },
+ {
+ "index": 276,
+ "pair_id": "F_origin_48_F_table_144_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_48/F_table_144_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_48",
+ "table_id": "F_table_144_0"
+ },
+ {
+ "index": 277,
+ "pair_id": "F_origin_48_F_table_144_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_48/F_table_144_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_48",
+ "table_id": "F_table_144_1"
+ },
+ {
+ "index": 278,
+ "pair_id": "F_origin_48_F_table_145_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_48/F_table_145_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_48",
+ "table_id": "F_table_145_0"
+ },
+ {
+ "index": 279,
+ "pair_id": "F_origin_48_F_table_145_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_48/F_table_145_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_48",
+ "table_id": "F_table_145_1"
+ },
+ {
+ "index": 280,
+ "pair_id": "F_origin_49_F_table_146_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_49/F_table_146_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_49",
+ "table_id": "F_table_146_0"
+ },
+ {
+ "index": 281,
+ "pair_id": "F_origin_49_F_table_147_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_49/F_table_147_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_49",
+ "table_id": "F_table_147_0"
+ },
+ {
+ "index": 282,
+ "pair_id": "F_origin_49_F_table_147_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_49/F_table_147_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_49",
+ "table_id": "F_table_147_1"
+ },
+ {
+ "index": 283,
+ "pair_id": "F_origin_49_F_table_147_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_49/F_table_147_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_49",
+ "table_id": "F_table_147_2"
+ },
+ {
+ "index": 284,
+ "pair_id": "F_origin_49_F_table_148_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_49/F_table_148_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_49",
+ "table_id": "F_table_148_0"
+ },
+ {
+ "index": 285,
+ "pair_id": "F_origin_49_F_table_148_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_49/F_table_148_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_49",
+ "table_id": "F_table_148_1"
+ },
+ {
+ "index": 286,
+ "pair_id": "F_origin_5_F_table_7_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_5/F_table_7_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_5",
+ "table_id": "F_table_7_0"
+ },
+ {
+ "index": 287,
+ "pair_id": "F_origin_5_F_table_7_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_5/F_table_7_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_5",
+ "table_id": "F_table_7_1"
+ },
+ {
+ "index": 288,
+ "pair_id": "F_origin_5_F_table_7_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_5/F_table_7_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_5",
+ "table_id": "F_table_7_2"
+ },
+ {
+ "index": 289,
+ "pair_id": "F_origin_5_F_table_7_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_5/F_table_7_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_5",
+ "table_id": "F_table_7_3"
+ },
+ {
+ "index": 290,
+ "pair_id": "F_origin_5_F_table_7_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_5/F_table_7_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_5",
+ "table_id": "F_table_7_4"
+ },
+ {
+ "index": 291,
+ "pair_id": "F_origin_6_F_table_8_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_6/F_table_8_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_6",
+ "table_id": "F_table_8_0"
+ },
+ {
+ "index": 292,
+ "pair_id": "F_origin_6_F_table_8_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_6/F_table_8_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_6",
+ "table_id": "F_table_8_1"
+ },
+ {
+ "index": 293,
+ "pair_id": "F_origin_6_F_table_8_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_6/F_table_8_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_6",
+ "table_id": "F_table_8_2"
+ },
+ {
+ "index": 294,
+ "pair_id": "F_origin_6_F_table_8_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_6/F_table_8_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_6",
+ "table_id": "F_table_8_3"
+ },
+ {
+ "index": 295,
+ "pair_id": "F_origin_6_F_table_8_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_6/F_table_8_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_6",
+ "table_id": "F_table_8_4"
+ },
+ {
+ "index": 296,
+ "pair_id": "F_origin_6_F_table_9_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_6/F_table_9_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_6",
+ "table_id": "F_table_9_0"
+ },
+ {
+ "index": 297,
+ "pair_id": "F_origin_6_F_table_9_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_6/F_table_9_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_6",
+ "table_id": "F_table_9_1"
+ },
+ {
+ "index": 298,
+ "pair_id": "F_origin_7_F_table_10_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_7/F_table_10_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_7",
+ "table_id": "F_table_10_0"
+ },
+ {
+ "index": 299,
+ "pair_id": "F_origin_7_F_table_10_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_7/F_table_10_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_7",
+ "table_id": "F_table_10_1"
+ },
+ {
+ "index": 300,
+ "pair_id": "F_origin_7_F_table_10_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_7/F_table_10_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_7",
+ "table_id": "F_table_10_2"
+ },
+ {
+ "index": 301,
+ "pair_id": "F_origin_7_F_table_10_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_7/F_table_10_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_7",
+ "table_id": "F_table_10_3"
+ },
+ {
+ "index": 302,
+ "pair_id": "F_origin_7_F_table_10_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_7/F_table_10_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_7",
+ "table_id": "F_table_10_4"
+ },
+ {
+ "index": 303,
+ "pair_id": "F_origin_7_F_table_11_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_7/F_table_11_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_7",
+ "table_id": "F_table_11_0"
+ },
+ {
+ "index": 304,
+ "pair_id": "F_origin_7_F_table_11_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_7/F_table_11_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_7",
+ "table_id": "F_table_11_1"
+ },
+ {
+ "index": 305,
+ "pair_id": "F_origin_8_F_table_12_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_8/F_table_12_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_8",
+ "table_id": "F_table_12_0"
+ },
+ {
+ "index": 306,
+ "pair_id": "F_origin_8_F_table_12_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_8/F_table_12_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_8",
+ "table_id": "F_table_12_1"
+ },
+ {
+ "index": 307,
+ "pair_id": "F_origin_8_F_table_12_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_8/F_table_12_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_8",
+ "table_id": "F_table_12_2"
+ },
+ {
+ "index": 308,
+ "pair_id": "F_origin_8_F_table_12_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_8/F_table_12_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_8",
+ "table_id": "F_table_12_3"
+ },
+ {
+ "index": 309,
+ "pair_id": "F_origin_8_F_table_12_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_8/F_table_12_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_8",
+ "table_id": "F_table_12_4"
+ },
+ {
+ "index": 310,
+ "pair_id": "F_origin_9_F_table_13_0",
+ "image_paths": [
+ "data/Finance/Table/F_origin_9/F_table_13_0.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_9",
+ "table_id": "F_table_13_0"
+ },
+ {
+ "index": 311,
+ "pair_id": "F_origin_9_F_table_13_1",
+ "image_paths": [
+ "data/Finance/Table/F_origin_9/F_table_13_1.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_9",
+ "table_id": "F_table_13_1"
+ },
+ {
+ "index": 312,
+ "pair_id": "F_origin_9_F_table_13_2",
+ "image_paths": [
+ "data/Finance/Table/F_origin_9/F_table_13_2.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_9",
+ "table_id": "F_table_13_2"
+ },
+ {
+ "index": 313,
+ "pair_id": "F_origin_9_F_table_13_3",
+ "image_paths": [
+ "data/Finance/Table/F_origin_9/F_table_13_3.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_9",
+ "table_id": "F_table_13_3"
+ },
+ {
+ "index": 314,
+ "pair_id": "F_origin_9_F_table_13_4",
+ "image_paths": [
+ "data/Finance/Table/F_origin_9/F_table_13_4.png"
+ ],
+ "domain": "Finance",
+ "origin": "F_origin_9",
+ "table_id": "F_table_13_4"
+ }
+]
\ No newline at end of file
diff --git a/single_image_json_list/single_table_insurance_input.json b/single_image_json_list/single_table_insurance_input.json
new file mode 100644
index 0000000..73dc561
--- /dev/null
+++ b/single_image_json_list/single_table_insurance_input.json
@@ -0,0 +1,1572 @@
+[
+ {
+ "index": 0,
+ "pair_id": "I_origin_0_I_table_0",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_0.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_0"
+ },
+ {
+ "index": 1,
+ "pair_id": "I_origin_0_I_table_1",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_1.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_1"
+ },
+ {
+ "index": 2,
+ "pair_id": "I_origin_0_I_table_10",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_10.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_10"
+ },
+ {
+ "index": 3,
+ "pair_id": "I_origin_0_I_table_11",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_11.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_11"
+ },
+ {
+ "index": 4,
+ "pair_id": "I_origin_0_I_table_12_0",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_12_0.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_12_0"
+ },
+ {
+ "index": 5,
+ "pair_id": "I_origin_0_I_table_12_1",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_12_1.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_12_1"
+ },
+ {
+ "index": 6,
+ "pair_id": "I_origin_0_I_table_13",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_13.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_13"
+ },
+ {
+ "index": 7,
+ "pair_id": "I_origin_0_I_table_14",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_14.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_14"
+ },
+ {
+ "index": 8,
+ "pair_id": "I_origin_0_I_table_15",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_15.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_15"
+ },
+ {
+ "index": 9,
+ "pair_id": "I_origin_0_I_table_16",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_16.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_16"
+ },
+ {
+ "index": 10,
+ "pair_id": "I_origin_0_I_table_17",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_17.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_17"
+ },
+ {
+ "index": 11,
+ "pair_id": "I_origin_0_I_table_18",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_18.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_18"
+ },
+ {
+ "index": 12,
+ "pair_id": "I_origin_0_I_table_19_0",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_0.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_0"
+ },
+ {
+ "index": 13,
+ "pair_id": "I_origin_0_I_table_19_1",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_1.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_1"
+ },
+ {
+ "index": 14,
+ "pair_id": "I_origin_0_I_table_19_10",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_10.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_10"
+ },
+ {
+ "index": 15,
+ "pair_id": "I_origin_0_I_table_19_11",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_11.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_11"
+ },
+ {
+ "index": 16,
+ "pair_id": "I_origin_0_I_table_19_12",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_12.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_12"
+ },
+ {
+ "index": 17,
+ "pair_id": "I_origin_0_I_table_19_13",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_13.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_13"
+ },
+ {
+ "index": 18,
+ "pair_id": "I_origin_0_I_table_19_14",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_14.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_14"
+ },
+ {
+ "index": 19,
+ "pair_id": "I_origin_0_I_table_19_15",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_15.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_15"
+ },
+ {
+ "index": 20,
+ "pair_id": "I_origin_0_I_table_19_16",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_16.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_16"
+ },
+ {
+ "index": 21,
+ "pair_id": "I_origin_0_I_table_19_17",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_17.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_17"
+ },
+ {
+ "index": 22,
+ "pair_id": "I_origin_0_I_table_19_18",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_18.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_18"
+ },
+ {
+ "index": 23,
+ "pair_id": "I_origin_0_I_table_19_19",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_19.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_19"
+ },
+ {
+ "index": 24,
+ "pair_id": "I_origin_0_I_table_19_2",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_2.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_2"
+ },
+ {
+ "index": 25,
+ "pair_id": "I_origin_0_I_table_19_20",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_20.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_20"
+ },
+ {
+ "index": 26,
+ "pair_id": "I_origin_0_I_table_19_21",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_21.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_21"
+ },
+ {
+ "index": 27,
+ "pair_id": "I_origin_0_I_table_19_3",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_3.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_3"
+ },
+ {
+ "index": 28,
+ "pair_id": "I_origin_0_I_table_19_4",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_4.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_4"
+ },
+ {
+ "index": 29,
+ "pair_id": "I_origin_0_I_table_19_5",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_5.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_5"
+ },
+ {
+ "index": 30,
+ "pair_id": "I_origin_0_I_table_19_6",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_6.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_6"
+ },
+ {
+ "index": 31,
+ "pair_id": "I_origin_0_I_table_19_7",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_7.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_7"
+ },
+ {
+ "index": 32,
+ "pair_id": "I_origin_0_I_table_19_8",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_8.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_8"
+ },
+ {
+ "index": 33,
+ "pair_id": "I_origin_0_I_table_19_9",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_19_9.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_19_9"
+ },
+ {
+ "index": 34,
+ "pair_id": "I_origin_0_I_table_2",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_2.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_2"
+ },
+ {
+ "index": 35,
+ "pair_id": "I_origin_0_I_table_20_0",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_20_0.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_20_0"
+ },
+ {
+ "index": 36,
+ "pair_id": "I_origin_0_I_table_20_1",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_20_1.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_20_1"
+ },
+ {
+ "index": 37,
+ "pair_id": "I_origin_0_I_table_20_2",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_20_2.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_20_2"
+ },
+ {
+ "index": 38,
+ "pair_id": "I_origin_0_I_table_20_3",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_20_3.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_20_3"
+ },
+ {
+ "index": 39,
+ "pair_id": "I_origin_0_I_table_20_4",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_20_4.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_20_4"
+ },
+ {
+ "index": 40,
+ "pair_id": "I_origin_0_I_table_20_5",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_20_5.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_20_5"
+ },
+ {
+ "index": 41,
+ "pair_id": "I_origin_0_I_table_20_6",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_20_6.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_20_6"
+ },
+ {
+ "index": 42,
+ "pair_id": "I_origin_0_I_table_20_7",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_20_7.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_20_7"
+ },
+ {
+ "index": 43,
+ "pair_id": "I_origin_0_I_table_20_8",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_20_8.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_20_8"
+ },
+ {
+ "index": 44,
+ "pair_id": "I_origin_0_I_table_20_9",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_20_9.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_20_9"
+ },
+ {
+ "index": 45,
+ "pair_id": "I_origin_0_I_table_21",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_21.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_21"
+ },
+ {
+ "index": 46,
+ "pair_id": "I_origin_0_I_table_22",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_22.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_22"
+ },
+ {
+ "index": 47,
+ "pair_id": "I_origin_0_I_table_23",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_23.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_23"
+ },
+ {
+ "index": 48,
+ "pair_id": "I_origin_0_I_table_24",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_24.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_24"
+ },
+ {
+ "index": 49,
+ "pair_id": "I_origin_0_I_table_25",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_25.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_25"
+ },
+ {
+ "index": 50,
+ "pair_id": "I_origin_0_I_table_26",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_26.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_26"
+ },
+ {
+ "index": 51,
+ "pair_id": "I_origin_0_I_table_27",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_27.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_27"
+ },
+ {
+ "index": 52,
+ "pair_id": "I_origin_0_I_table_28",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_28.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_28"
+ },
+ {
+ "index": 53,
+ "pair_id": "I_origin_0_I_table_29",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_29.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_29"
+ },
+ {
+ "index": 54,
+ "pair_id": "I_origin_0_I_table_3",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_3.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_3"
+ },
+ {
+ "index": 55,
+ "pair_id": "I_origin_0_I_table_30",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_30.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_30"
+ },
+ {
+ "index": 56,
+ "pair_id": "I_origin_0_I_table_31",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_31.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_31"
+ },
+ {
+ "index": 57,
+ "pair_id": "I_origin_0_I_table_32",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_32.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_32"
+ },
+ {
+ "index": 58,
+ "pair_id": "I_origin_0_I_table_33",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_33.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_33"
+ },
+ {
+ "index": 59,
+ "pair_id": "I_origin_0_I_table_34",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_34.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_34"
+ },
+ {
+ "index": 60,
+ "pair_id": "I_origin_0_I_table_35",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_35.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_35"
+ },
+ {
+ "index": 61,
+ "pair_id": "I_origin_0_I_table_36",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_36.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_36"
+ },
+ {
+ "index": 62,
+ "pair_id": "I_origin_0_I_table_37",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_37.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_37"
+ },
+ {
+ "index": 63,
+ "pair_id": "I_origin_0_I_table_38",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_38.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_38"
+ },
+ {
+ "index": 64,
+ "pair_id": "I_origin_0_I_table_39",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_39.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_39"
+ },
+ {
+ "index": 65,
+ "pair_id": "I_origin_0_I_table_4",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_4.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_4"
+ },
+ {
+ "index": 66,
+ "pair_id": "I_origin_0_I_table_40_0",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_40_0.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_40_0"
+ },
+ {
+ "index": 67,
+ "pair_id": "I_origin_0_I_table_40_1",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_40_1.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_40_1"
+ },
+ {
+ "index": 68,
+ "pair_id": "I_origin_0_I_table_41",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_41.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_41"
+ },
+ {
+ "index": 69,
+ "pair_id": "I_origin_0_I_table_42",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_42.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_42"
+ },
+ {
+ "index": 70,
+ "pair_id": "I_origin_0_I_table_43",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_43.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_43"
+ },
+ {
+ "index": 71,
+ "pair_id": "I_origin_0_I_table_44",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_44.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_44"
+ },
+ {
+ "index": 72,
+ "pair_id": "I_origin_0_I_table_45",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_45.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_45"
+ },
+ {
+ "index": 73,
+ "pair_id": "I_origin_0_I_table_46",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_46.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_46"
+ },
+ {
+ "index": 74,
+ "pair_id": "I_origin_0_I_table_47",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_47.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_47"
+ },
+ {
+ "index": 75,
+ "pair_id": "I_origin_0_I_table_48",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_48.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_48"
+ },
+ {
+ "index": 76,
+ "pair_id": "I_origin_0_I_table_49",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_49.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_49"
+ },
+ {
+ "index": 77,
+ "pair_id": "I_origin_0_I_table_5",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_5.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_5"
+ },
+ {
+ "index": 78,
+ "pair_id": "I_origin_0_I_table_50",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_50.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_50"
+ },
+ {
+ "index": 79,
+ "pair_id": "I_origin_0_I_table_51",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_51.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_51"
+ },
+ {
+ "index": 80,
+ "pair_id": "I_origin_0_I_table_52",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_52.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_52"
+ },
+ {
+ "index": 81,
+ "pair_id": "I_origin_0_I_table_53",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_53.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_53"
+ },
+ {
+ "index": 82,
+ "pair_id": "I_origin_0_I_table_54",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_54.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_54"
+ },
+ {
+ "index": 83,
+ "pair_id": "I_origin_0_I_table_55",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_55.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_55"
+ },
+ {
+ "index": 84,
+ "pair_id": "I_origin_0_I_table_56",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_56.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_56"
+ },
+ {
+ "index": 85,
+ "pair_id": "I_origin_0_I_table_57",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_57.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_57"
+ },
+ {
+ "index": 86,
+ "pair_id": "I_origin_0_I_table_58",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_58.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_58"
+ },
+ {
+ "index": 87,
+ "pair_id": "I_origin_0_I_table_59",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_59.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_59"
+ },
+ {
+ "index": 88,
+ "pair_id": "I_origin_0_I_table_6",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_6.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_6"
+ },
+ {
+ "index": 89,
+ "pair_id": "I_origin_0_I_table_60",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_60.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_60"
+ },
+ {
+ "index": 90,
+ "pair_id": "I_origin_0_I_table_61",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_61.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_61"
+ },
+ {
+ "index": 91,
+ "pair_id": "I_origin_0_I_table_62",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_62.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_62"
+ },
+ {
+ "index": 92,
+ "pair_id": "I_origin_0_I_table_63",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_63.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_63"
+ },
+ {
+ "index": 93,
+ "pair_id": "I_origin_0_I_table_64_0",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_0.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_0"
+ },
+ {
+ "index": 94,
+ "pair_id": "I_origin_0_I_table_64_1",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_1.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_1"
+ },
+ {
+ "index": 95,
+ "pair_id": "I_origin_0_I_table_64_10",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_10.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_10"
+ },
+ {
+ "index": 96,
+ "pair_id": "I_origin_0_I_table_64_11",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_11.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_11"
+ },
+ {
+ "index": 97,
+ "pair_id": "I_origin_0_I_table_64_12",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_12.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_12"
+ },
+ {
+ "index": 98,
+ "pair_id": "I_origin_0_I_table_64_13",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_13.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_13"
+ },
+ {
+ "index": 99,
+ "pair_id": "I_origin_0_I_table_64_14",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_14.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_14"
+ },
+ {
+ "index": 100,
+ "pair_id": "I_origin_0_I_table_64_15",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_15.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_15"
+ },
+ {
+ "index": 101,
+ "pair_id": "I_origin_0_I_table_64_16",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_16.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_16"
+ },
+ {
+ "index": 102,
+ "pair_id": "I_origin_0_I_table_64_17",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_17.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_17"
+ },
+ {
+ "index": 103,
+ "pair_id": "I_origin_0_I_table_64_18",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_18.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_18"
+ },
+ {
+ "index": 104,
+ "pair_id": "I_origin_0_I_table_64_19",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_19.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_19"
+ },
+ {
+ "index": 105,
+ "pair_id": "I_origin_0_I_table_64_2",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_2.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_2"
+ },
+ {
+ "index": 106,
+ "pair_id": "I_origin_0_I_table_64_20",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_20.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_20"
+ },
+ {
+ "index": 107,
+ "pair_id": "I_origin_0_I_table_64_21",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_21.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_21"
+ },
+ {
+ "index": 108,
+ "pair_id": "I_origin_0_I_table_64_22",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_22.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_22"
+ },
+ {
+ "index": 109,
+ "pair_id": "I_origin_0_I_table_64_23",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_23.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_23"
+ },
+ {
+ "index": 110,
+ "pair_id": "I_origin_0_I_table_64_24",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_24.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_24"
+ },
+ {
+ "index": 111,
+ "pair_id": "I_origin_0_I_table_64_25",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_25.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_25"
+ },
+ {
+ "index": 112,
+ "pair_id": "I_origin_0_I_table_64_26",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_26.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_26"
+ },
+ {
+ "index": 113,
+ "pair_id": "I_origin_0_I_table_64_27",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_27.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_27"
+ },
+ {
+ "index": 114,
+ "pair_id": "I_origin_0_I_table_64_3",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_3.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_3"
+ },
+ {
+ "index": 115,
+ "pair_id": "I_origin_0_I_table_64_4",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_4.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_4"
+ },
+ {
+ "index": 116,
+ "pair_id": "I_origin_0_I_table_64_5",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_5.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_5"
+ },
+ {
+ "index": 117,
+ "pair_id": "I_origin_0_I_table_64_6",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_6.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_6"
+ },
+ {
+ "index": 118,
+ "pair_id": "I_origin_0_I_table_64_7",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_7.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_7"
+ },
+ {
+ "index": 119,
+ "pair_id": "I_origin_0_I_table_64_8",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_8.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_8"
+ },
+ {
+ "index": 120,
+ "pair_id": "I_origin_0_I_table_64_9",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_64_9.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_64_9"
+ },
+ {
+ "index": 121,
+ "pair_id": "I_origin_0_I_table_65_0",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_65_0.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_65_0"
+ },
+ {
+ "index": 122,
+ "pair_id": "I_origin_0_I_table_65_1",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_65_1.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_65_1"
+ },
+ {
+ "index": 123,
+ "pair_id": "I_origin_0_I_table_65_2",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_65_2.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_65_2"
+ },
+ {
+ "index": 124,
+ "pair_id": "I_origin_0_I_table_66",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_66.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_66"
+ },
+ {
+ "index": 125,
+ "pair_id": "I_origin_0_I_table_67",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_67.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_67"
+ },
+ {
+ "index": 126,
+ "pair_id": "I_origin_0_I_table_68",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_68.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_68"
+ },
+ {
+ "index": 127,
+ "pair_id": "I_origin_0_I_table_69",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_69.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_69"
+ },
+ {
+ "index": 128,
+ "pair_id": "I_origin_0_I_table_7",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_7.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_7"
+ },
+ {
+ "index": 129,
+ "pair_id": "I_origin_0_I_table_70",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_70.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_70"
+ },
+ {
+ "index": 130,
+ "pair_id": "I_origin_0_I_table_71",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_71.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_71"
+ },
+ {
+ "index": 131,
+ "pair_id": "I_origin_0_I_table_72",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_72.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_72"
+ },
+ {
+ "index": 132,
+ "pair_id": "I_origin_0_I_table_73",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_73.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_73"
+ },
+ {
+ "index": 133,
+ "pair_id": "I_origin_0_I_table_74",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_74.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_74"
+ },
+ {
+ "index": 134,
+ "pair_id": "I_origin_0_I_table_75",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_75.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_75"
+ },
+ {
+ "index": 135,
+ "pair_id": "I_origin_0_I_table_76",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_76.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_76"
+ },
+ {
+ "index": 136,
+ "pair_id": "I_origin_0_I_table_77",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_77.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_77"
+ },
+ {
+ "index": 137,
+ "pair_id": "I_origin_0_I_table_8",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_8.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_8"
+ },
+ {
+ "index": 138,
+ "pair_id": "I_origin_0_I_table_9",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_0/I_table_9.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_0",
+ "table_id": "I_table_9"
+ },
+ {
+ "index": 139,
+ "pair_id": "I_origin_1_I_table_78",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_1/I_table_78.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_1",
+ "table_id": "I_table_78"
+ },
+ {
+ "index": 140,
+ "pair_id": "I_origin_2_I_table_79",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_79.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_79"
+ },
+ {
+ "index": 141,
+ "pair_id": "I_origin_2_I_table_80",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_80.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_80"
+ },
+ {
+ "index": 142,
+ "pair_id": "I_origin_2_I_table_81",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_81.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_81"
+ },
+ {
+ "index": 143,
+ "pair_id": "I_origin_2_I_table_82",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_82.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_82"
+ },
+ {
+ "index": 144,
+ "pair_id": "I_origin_2_I_table_83",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_83.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_83"
+ },
+ {
+ "index": 145,
+ "pair_id": "I_origin_2_I_table_84",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_84.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_84"
+ },
+ {
+ "index": 146,
+ "pair_id": "I_origin_2_I_table_85",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_85.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_85"
+ },
+ {
+ "index": 147,
+ "pair_id": "I_origin_2_I_table_86",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_86.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_86"
+ },
+ {
+ "index": 148,
+ "pair_id": "I_origin_2_I_table_87",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_87.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_87"
+ },
+ {
+ "index": 149,
+ "pair_id": "I_origin_2_I_table_88",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_88.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_88"
+ },
+ {
+ "index": 150,
+ "pair_id": "I_origin_2_I_table_89",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_89.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_89"
+ },
+ {
+ "index": 151,
+ "pair_id": "I_origin_2_I_table_90",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_90.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_90"
+ },
+ {
+ "index": 152,
+ "pair_id": "I_origin_2_I_table_91",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_91.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_91"
+ },
+ {
+ "index": 153,
+ "pair_id": "I_origin_2_I_table_92",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_92.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_92"
+ },
+ {
+ "index": 154,
+ "pair_id": "I_origin_2_I_table_93",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_93.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_93"
+ },
+ {
+ "index": 155,
+ "pair_id": "I_origin_2_I_table_94",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_94.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_94"
+ },
+ {
+ "index": 156,
+ "pair_id": "I_origin_2_I_table_95",
+ "image_paths": [
+ "data/Insurance/Table/I_origin_2/I_table_95.png"
+ ],
+ "domain": "Insurance",
+ "origin": "I_origin_2",
+ "table_id": "I_table_95"
+ }
+]
\ No newline at end of file
diff --git a/single_image_json_list/single_table_medical_input.json b/single_image_json_list/single_table_medical_input.json
new file mode 100644
index 0000000..3978487
--- /dev/null
+++ b/single_image_json_list/single_table_medical_input.json
@@ -0,0 +1,1292 @@
+[
+ {
+ "index": 0,
+ "pair_id": "Medical_M_table_0_0_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_0_0_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_0_0_0"
+ },
+ {
+ "index": 1,
+ "pair_id": "Medical_M_table_0_1_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_0_1_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_0_1_0"
+ },
+ {
+ "index": 2,
+ "pair_id": "Medical_M_table_10_0_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_0_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_0_0"
+ },
+ {
+ "index": 3,
+ "pair_id": "Medical_M_table_10_0_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_0_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_0_1"
+ },
+ {
+ "index": 4,
+ "pair_id": "Medical_M_table_10_0_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_0_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_0_2"
+ },
+ {
+ "index": 5,
+ "pair_id": "Medical_M_table_10_1_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_1_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_1_0"
+ },
+ {
+ "index": 6,
+ "pair_id": "Medical_M_table_10_1_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_1_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_1_1"
+ },
+ {
+ "index": 7,
+ "pair_id": "Medical_M_table_10_1_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_1_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_1_2"
+ },
+ {
+ "index": 8,
+ "pair_id": "Medical_M_table_10_1_3",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_1_3.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_1_3"
+ },
+ {
+ "index": 9,
+ "pair_id": "Medical_M_table_10_2_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_2_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_2_0"
+ },
+ {
+ "index": 10,
+ "pair_id": "Medical_M_table_10_2_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_2_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_2_1"
+ },
+ {
+ "index": 11,
+ "pair_id": "Medical_M_table_10_2_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_2_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_2_2"
+ },
+ {
+ "index": 12,
+ "pair_id": "Medical_M_table_10_2_3",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_2_3.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_2_3"
+ },
+ {
+ "index": 13,
+ "pair_id": "Medical_M_table_10_2_4",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_2_4.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_2_4"
+ },
+ {
+ "index": 14,
+ "pair_id": "Medical_M_table_10_2_5",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_2_5.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_2_5"
+ },
+ {
+ "index": 15,
+ "pair_id": "Medical_M_table_10_3_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_3_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_3_0"
+ },
+ {
+ "index": 16,
+ "pair_id": "Medical_M_table_10_3_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_3_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_3_1"
+ },
+ {
+ "index": 17,
+ "pair_id": "Medical_M_table_10_3_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_3_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_3_2"
+ },
+ {
+ "index": 18,
+ "pair_id": "Medical_M_table_10_3_3",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_3_3.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_3_3"
+ },
+ {
+ "index": 19,
+ "pair_id": "Medical_M_table_10_3_4",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_3_4.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_3_4"
+ },
+ {
+ "index": 20,
+ "pair_id": "Medical_M_table_10_4_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_4_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_4_0"
+ },
+ {
+ "index": 21,
+ "pair_id": "Medical_M_table_10_4_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_4_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_4_1"
+ },
+ {
+ "index": 22,
+ "pair_id": "Medical_M_table_10_4_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_4_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_4_2"
+ },
+ {
+ "index": 23,
+ "pair_id": "Medical_M_table_10_4_3",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_4_3.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_4_3"
+ },
+ {
+ "index": 24,
+ "pair_id": "Medical_M_table_10_4_4",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_4_4.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_4_4"
+ },
+ {
+ "index": 25,
+ "pair_id": "Medical_M_table_10_5_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_10_5_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_10_5_0"
+ },
+ {
+ "index": 26,
+ "pair_id": "Medical_M_table_11_0_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_11_0_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_11_0_0"
+ },
+ {
+ "index": 27,
+ "pair_id": "Medical_M_table_11_0_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_11_0_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_11_0_1"
+ },
+ {
+ "index": 28,
+ "pair_id": "Medical_M_table_11_0_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_11_0_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_11_0_2"
+ },
+ {
+ "index": 29,
+ "pair_id": "Medical_M_table_11_0_3",
+ "image_paths": [
+ "data/Medical/Table/M_table_11_0_3.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_11_0_3"
+ },
+ {
+ "index": 30,
+ "pair_id": "Medical_M_table_11_0_4",
+ "image_paths": [
+ "data/Medical/Table/M_table_11_0_4.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_11_0_4"
+ },
+ {
+ "index": 31,
+ "pair_id": "Medical_M_table_12_0_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_12_0_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_12_0_0"
+ },
+ {
+ "index": 32,
+ "pair_id": "Medical_M_table_13_0_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_13_0_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_13_0_0"
+ },
+ {
+ "index": 33,
+ "pair_id": "Medical_M_table_13_0_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_13_0_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_13_0_1"
+ },
+ {
+ "index": 34,
+ "pair_id": "Medical_M_table_13_0_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_13_0_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_13_0_2"
+ },
+ {
+ "index": 35,
+ "pair_id": "Medical_M_table_13_0_3",
+ "image_paths": [
+ "data/Medical/Table/M_table_13_0_3.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_13_0_3"
+ },
+ {
+ "index": 36,
+ "pair_id": "Medical_M_table_13_1_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_13_1_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_13_1_0"
+ },
+ {
+ "index": 37,
+ "pair_id": "Medical_M_table_13_1_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_13_1_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_13_1_1"
+ },
+ {
+ "index": 38,
+ "pair_id": "Medical_M_table_13_1_10",
+ "image_paths": [
+ "data/Medical/Table/M_table_13_1_10.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_13_1_10"
+ },
+ {
+ "index": 39,
+ "pair_id": "Medical_M_table_13_1_11",
+ "image_paths": [
+ "data/Medical/Table/M_table_13_1_11.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_13_1_11"
+ },
+ {
+ "index": 40,
+ "pair_id": "Medical_M_table_13_1_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_13_1_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_13_1_2"
+ },
+ {
+ "index": 41,
+ "pair_id": "Medical_M_table_13_1_6",
+ "image_paths": [
+ "data/Medical/Table/M_table_13_1_6.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_13_1_6"
+ },
+ {
+ "index": 42,
+ "pair_id": "Medical_M_table_13_1_8",
+ "image_paths": [
+ "data/Medical/Table/M_table_13_1_8.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_13_1_8"
+ },
+ {
+ "index": 43,
+ "pair_id": "Medical_M_table_13_2_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_13_2_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_13_2_0"
+ },
+ {
+ "index": 44,
+ "pair_id": "Medical_M_table_13_2_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_13_2_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_13_2_1"
+ },
+ {
+ "index": 45,
+ "pair_id": "Medical_M_table_14_0_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_14_0_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_14_0_0"
+ },
+ {
+ "index": 46,
+ "pair_id": "Medical_M_table_14_0_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_14_0_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_14_0_1"
+ },
+ {
+ "index": 47,
+ "pair_id": "Medical_M_table_14_1_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_14_1_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_14_1_0"
+ },
+ {
+ "index": 48,
+ "pair_id": "Medical_M_table_14_2_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_14_2_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_14_2_0"
+ },
+ {
+ "index": 49,
+ "pair_id": "Medical_M_table_14_3_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_14_3_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_14_3_0"
+ },
+ {
+ "index": 50,
+ "pair_id": "Medical_M_table_14_3_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_14_3_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_14_3_1"
+ },
+ {
+ "index": 51,
+ "pair_id": "Medical_M_table_14_3_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_14_3_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_14_3_2"
+ },
+ {
+ "index": 52,
+ "pair_id": "Medical_M_table_14_3_3",
+ "image_paths": [
+ "data/Medical/Table/M_table_14_3_3.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_14_3_3"
+ },
+ {
+ "index": 53,
+ "pair_id": "Medical_M_table_14_3_4",
+ "image_paths": [
+ "data/Medical/Table/M_table_14_3_4.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_14_3_4"
+ },
+ {
+ "index": 54,
+ "pair_id": "Medical_M_table_15_0_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_0_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_0_0"
+ },
+ {
+ "index": 55,
+ "pair_id": "Medical_M_table_15_0_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_0_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_0_1"
+ },
+ {
+ "index": 56,
+ "pair_id": "Medical_M_table_15_0_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_0_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_0_2"
+ },
+ {
+ "index": 57,
+ "pair_id": "Medical_M_table_15_10_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_10_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_10_0"
+ },
+ {
+ "index": 58,
+ "pair_id": "Medical_M_table_15_10_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_10_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_10_1"
+ },
+ {
+ "index": 59,
+ "pair_id": "Medical_M_table_15_11_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_11_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_11_0"
+ },
+ {
+ "index": 60,
+ "pair_id": "Medical_M_table_15_12_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_12_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_12_0"
+ },
+ {
+ "index": 61,
+ "pair_id": "Medical_M_table_15_13_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_13_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_13_0"
+ },
+ {
+ "index": 62,
+ "pair_id": "Medical_M_table_15_1_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_1_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_1_0"
+ },
+ {
+ "index": 63,
+ "pair_id": "Medical_M_table_15_2_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_2_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_2_0"
+ },
+ {
+ "index": 64,
+ "pair_id": "Medical_M_table_15_3_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_3_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_3_0"
+ },
+ {
+ "index": 65,
+ "pair_id": "Medical_M_table_15_4_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_4_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_4_0"
+ },
+ {
+ "index": 66,
+ "pair_id": "Medical_M_table_15_5_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_5_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_5_0"
+ },
+ {
+ "index": 67,
+ "pair_id": "Medical_M_table_15_5_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_5_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_5_1"
+ },
+ {
+ "index": 68,
+ "pair_id": "Medical_M_table_15_5_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_5_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_5_2"
+ },
+ {
+ "index": 69,
+ "pair_id": "Medical_M_table_15_6_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_6_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_6_0"
+ },
+ {
+ "index": 70,
+ "pair_id": "Medical_M_table_15_7_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_7_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_7_0"
+ },
+ {
+ "index": 71,
+ "pair_id": "Medical_M_table_15_8_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_8_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_8_0"
+ },
+ {
+ "index": 72,
+ "pair_id": "Medical_M_table_15_9_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_15_9_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_15_9_0"
+ },
+ {
+ "index": 73,
+ "pair_id": "Medical_M_table_16_0_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_16_0_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_16_0_0"
+ },
+ {
+ "index": 74,
+ "pair_id": "Medical_M_table_16_0_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_16_0_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_16_0_1"
+ },
+ {
+ "index": 75,
+ "pair_id": "Medical_M_table_16_1_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_16_1_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_16_1_0"
+ },
+ {
+ "index": 76,
+ "pair_id": "Medical_M_table_16_1_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_16_1_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_16_1_1"
+ },
+ {
+ "index": 77,
+ "pair_id": "Medical_M_table_1_0_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_1_0_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_1_0_0"
+ },
+ {
+ "index": 78,
+ "pair_id": "Medical_M_table_2_0_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_0_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_0_0"
+ },
+ {
+ "index": 79,
+ "pair_id": "Medical_M_table_2_1_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_1_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_1_0"
+ },
+ {
+ "index": 80,
+ "pair_id": "Medical_M_table_2_2_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_2_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_2_0"
+ },
+ {
+ "index": 81,
+ "pair_id": "Medical_M_table_2_2_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_2_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_2_1"
+ },
+ {
+ "index": 82,
+ "pair_id": "Medical_M_table_2_3_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_3_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_3_0"
+ },
+ {
+ "index": 83,
+ "pair_id": "Medical_M_table_2_3_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_3_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_3_1"
+ },
+ {
+ "index": 84,
+ "pair_id": "Medical_M_table_2_4_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_4_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_4_0"
+ },
+ {
+ "index": 85,
+ "pair_id": "Medical_M_table_2_4_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_4_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_4_1"
+ },
+ {
+ "index": 86,
+ "pair_id": "Medical_M_table_2_5_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_5_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_5_0"
+ },
+ {
+ "index": 87,
+ "pair_id": "Medical_M_table_2_6_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_6_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_6_0"
+ },
+ {
+ "index": 88,
+ "pair_id": "Medical_M_table_2_6_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_6_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_6_1"
+ },
+ {
+ "index": 89,
+ "pair_id": "Medical_M_table_2_6_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_6_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_6_2"
+ },
+ {
+ "index": 90,
+ "pair_id": "Medical_M_table_2_6_3",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_6_3.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_6_3"
+ },
+ {
+ "index": 91,
+ "pair_id": "Medical_M_table_2_6_4",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_6_4.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_6_4"
+ },
+ {
+ "index": 92,
+ "pair_id": "Medical_M_table_2_6_5",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_6_5.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_6_5"
+ },
+ {
+ "index": 93,
+ "pair_id": "Medical_M_table_2_6_6",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_6_6.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_6_6"
+ },
+ {
+ "index": 94,
+ "pair_id": "Medical_M_table_2_6_7",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_6_7.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_6_7"
+ },
+ {
+ "index": 95,
+ "pair_id": "Medical_M_table_2_7_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_2_7_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_2_7_0"
+ },
+ {
+ "index": 96,
+ "pair_id": "Medical_M_table_3_0_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_3_0_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_3_0_0"
+ },
+ {
+ "index": 97,
+ "pair_id": "Medical_M_table_3_0_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_3_0_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_3_0_1"
+ },
+ {
+ "index": 98,
+ "pair_id": "Medical_M_table_3_0_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_3_0_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_3_0_2"
+ },
+ {
+ "index": 99,
+ "pair_id": "Medical_M_table_3_1_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_3_1_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_3_1_0"
+ },
+ {
+ "index": 100,
+ "pair_id": "Medical_M_table_3_1_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_3_1_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_3_1_1"
+ },
+ {
+ "index": 101,
+ "pair_id": "Medical_M_table_3_2_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_3_2_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_3_2_0"
+ },
+ {
+ "index": 102,
+ "pair_id": "Medical_M_table_3_2_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_3_2_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_3_2_1"
+ },
+ {
+ "index": 103,
+ "pair_id": "Medical_M_table_3_3_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_3_3_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_3_3_0"
+ },
+ {
+ "index": 104,
+ "pair_id": "Medical_M_table_3_3_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_3_3_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_3_3_1"
+ },
+ {
+ "index": 105,
+ "pair_id": "Medical_M_table_4_0_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_4_0_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_4_0_0"
+ },
+ {
+ "index": 106,
+ "pair_id": "Medical_M_table_4_0_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_4_0_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_4_0_1"
+ },
+ {
+ "index": 107,
+ "pair_id": "Medical_M_table_4_1_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_4_1_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_4_1_0"
+ },
+ {
+ "index": 108,
+ "pair_id": "Medical_M_table_5_0_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_5_0_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_5_0_0"
+ },
+ {
+ "index": 109,
+ "pair_id": "Medical_M_table_6_0_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_6_0_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_6_0_0"
+ },
+ {
+ "index": 110,
+ "pair_id": "Medical_M_table_6_1_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_6_1_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_6_1_0"
+ },
+ {
+ "index": 111,
+ "pair_id": "Medical_M_table_6_2_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_6_2_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_6_2_0"
+ },
+ {
+ "index": 112,
+ "pair_id": "Medical_M_table_6_3_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_6_3_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_6_3_0"
+ },
+ {
+ "index": 113,
+ "pair_id": "Medical_M_table_6_3_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_6_3_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_6_3_1"
+ },
+ {
+ "index": 114,
+ "pair_id": "Medical_M_table_8_0_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_8_0_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_8_0_0"
+ },
+ {
+ "index": 115,
+ "pair_id": "Medical_M_table_8_0_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_8_0_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_8_0_1"
+ },
+ {
+ "index": 116,
+ "pair_id": "Medical_M_table_8_0_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_8_0_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_8_0_2"
+ },
+ {
+ "index": 117,
+ "pair_id": "Medical_M_table_8_1_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_8_1_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_8_1_0"
+ },
+ {
+ "index": 118,
+ "pair_id": "Medical_M_table_8_2_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_8_2_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_8_2_0"
+ },
+ {
+ "index": 119,
+ "pair_id": "Medical_M_table_8_3_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_8_3_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_8_3_0"
+ },
+ {
+ "index": 120,
+ "pair_id": "Medical_M_table_9_0_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_9_0_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_9_0_0"
+ },
+ {
+ "index": 121,
+ "pair_id": "Medical_M_table_9_0_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_9_0_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_9_0_1"
+ },
+ {
+ "index": 122,
+ "pair_id": "Medical_M_table_9_0_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_9_0_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_9_0_2"
+ },
+ {
+ "index": 123,
+ "pair_id": "Medical_M_table_9_1_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_9_1_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_9_1_0"
+ },
+ {
+ "index": 124,
+ "pair_id": "Medical_M_table_9_1_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_9_1_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_9_1_1"
+ },
+ {
+ "index": 125,
+ "pair_id": "Medical_M_table_9_1_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_9_1_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_9_1_2"
+ },
+ {
+ "index": 126,
+ "pair_id": "Medical_M_table_9_2_0",
+ "image_paths": [
+ "data/Medical/Table/M_table_9_2_0.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_9_2_0"
+ },
+ {
+ "index": 127,
+ "pair_id": "Medical_M_table_9_2_1",
+ "image_paths": [
+ "data/Medical/Table/M_table_9_2_1.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_9_2_1"
+ },
+ {
+ "index": 128,
+ "pair_id": "Medical_M_table_9_2_2",
+ "image_paths": [
+ "data/Medical/Table/M_table_9_2_2.png"
+ ],
+ "domain": "Medical",
+ "origin": "Medical",
+ "table_id": "M_table_9_2_2"
+ }
+]
\ No newline at end of file
diff --git a/single_image_json_list/single_table_public_input.json b/single_image_json_list/single_table_public_input.json
new file mode 100644
index 0000000..b7d33d5
--- /dev/null
+++ b/single_image_json_list/single_table_public_input.json
@@ -0,0 +1,2492 @@
+[
+ {
+ "index": 0,
+ "pair_id": "P_origin_0_P_origin_0_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_0/P_origin_0_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_0",
+ "table_id": "P_origin_0_0"
+ },
+ {
+ "index": 1,
+ "pair_id": "P_origin_0_P_origin_0_1_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_0/P_origin_0_1_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_0",
+ "table_id": "P_origin_0_1_0"
+ },
+ {
+ "index": 2,
+ "pair_id": "P_origin_0_P_origin_0_1_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_0/P_origin_0_1_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_0",
+ "table_id": "P_origin_0_1_1"
+ },
+ {
+ "index": 3,
+ "pair_id": "P_origin_0_P_origin_0_1_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_0/P_origin_0_1_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_0",
+ "table_id": "P_origin_0_1_2"
+ },
+ {
+ "index": 4,
+ "pair_id": "P_origin_0_P_origin_0_1_3",
+ "image_paths": [
+ "data/Public/Table/P_origin_0/P_origin_0_1_3.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_0",
+ "table_id": "P_origin_0_1_3"
+ },
+ {
+ "index": 5,
+ "pair_id": "P_origin_0_P_origin_0_1_4",
+ "image_paths": [
+ "data/Public/Table/P_origin_0/P_origin_0_1_4.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_0",
+ "table_id": "P_origin_0_1_4"
+ },
+ {
+ "index": 6,
+ "pair_id": "P_origin_0_P_origin_0_1_5",
+ "image_paths": [
+ "data/Public/Table/P_origin_0/P_origin_0_1_5.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_0",
+ "table_id": "P_origin_0_1_5"
+ },
+ {
+ "index": 7,
+ "pair_id": "P_origin_0_P_origin_0_1_6",
+ "image_paths": [
+ "data/Public/Table/P_origin_0/P_origin_0_1_6.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_0",
+ "table_id": "P_origin_0_1_6"
+ },
+ {
+ "index": 8,
+ "pair_id": "P_origin_0_P_origin_0_2_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_0/P_origin_0_2_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_0",
+ "table_id": "P_origin_0_2_0"
+ },
+ {
+ "index": 9,
+ "pair_id": "P_origin_0_P_origin_0_2_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_0/P_origin_0_2_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_0",
+ "table_id": "P_origin_0_2_1"
+ },
+ {
+ "index": 10,
+ "pair_id": "P_origin_0_P_origin_0_2_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_0/P_origin_0_2_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_0",
+ "table_id": "P_origin_0_2_2"
+ },
+ {
+ "index": 11,
+ "pair_id": "P_origin_1_P_origin_1_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_0"
+ },
+ {
+ "index": 12,
+ "pair_id": "P_origin_1_P_origin_1_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_1"
+ },
+ {
+ "index": 13,
+ "pair_id": "P_origin_1_P_origin_1_10",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_10.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_10"
+ },
+ {
+ "index": 14,
+ "pair_id": "P_origin_1_P_origin_1_11_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_11_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_11_0"
+ },
+ {
+ "index": 15,
+ "pair_id": "P_origin_1_P_origin_1_11_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_11_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_11_1"
+ },
+ {
+ "index": 16,
+ "pair_id": "P_origin_1_P_origin_1_11_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_11_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_11_2"
+ },
+ {
+ "index": 17,
+ "pair_id": "P_origin_1_P_origin_1_12_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_12_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_12_0"
+ },
+ {
+ "index": 18,
+ "pair_id": "P_origin_1_P_origin_1_12_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_12_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_12_1"
+ },
+ {
+ "index": 19,
+ "pair_id": "P_origin_1_P_origin_1_12_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_12_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_12_2"
+ },
+ {
+ "index": 20,
+ "pair_id": "P_origin_1_P_origin_1_13_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_13_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_13_0"
+ },
+ {
+ "index": 21,
+ "pair_id": "P_origin_1_P_origin_1_13_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_13_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_13_1"
+ },
+ {
+ "index": 22,
+ "pair_id": "P_origin_1_P_origin_1_13_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_13_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_13_2"
+ },
+ {
+ "index": 23,
+ "pair_id": "P_origin_1_P_origin_1_14_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_14_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_14_0"
+ },
+ {
+ "index": 24,
+ "pair_id": "P_origin_1_P_origin_1_14_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_14_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_14_1"
+ },
+ {
+ "index": 25,
+ "pair_id": "P_origin_1_P_origin_1_14_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_14_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_14_2"
+ },
+ {
+ "index": 26,
+ "pair_id": "P_origin_1_P_origin_1_15_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_15_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_15_0"
+ },
+ {
+ "index": 27,
+ "pair_id": "P_origin_1_P_origin_1_15_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_15_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_15_1"
+ },
+ {
+ "index": 28,
+ "pair_id": "P_origin_1_P_origin_1_15_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_15_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_15_2"
+ },
+ {
+ "index": 29,
+ "pair_id": "P_origin_1_P_origin_1_16_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_16_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_16_0"
+ },
+ {
+ "index": 30,
+ "pair_id": "P_origin_1_P_origin_1_16_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_16_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_16_1"
+ },
+ {
+ "index": 31,
+ "pair_id": "P_origin_1_P_origin_1_16_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_16_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_16_2"
+ },
+ {
+ "index": 32,
+ "pair_id": "P_origin_1_P_origin_1_17_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_17_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_17_0"
+ },
+ {
+ "index": 33,
+ "pair_id": "P_origin_1_P_origin_1_17_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_17_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_17_1"
+ },
+ {
+ "index": 34,
+ "pair_id": "P_origin_1_P_origin_1_17_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_17_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_17_2"
+ },
+ {
+ "index": 35,
+ "pair_id": "P_origin_1_P_origin_1_18_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_18_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_18_0"
+ },
+ {
+ "index": 36,
+ "pair_id": "P_origin_1_P_origin_1_18_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_18_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_18_1"
+ },
+ {
+ "index": 37,
+ "pair_id": "P_origin_1_P_origin_1_19_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_19_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_19_0"
+ },
+ {
+ "index": 38,
+ "pair_id": "P_origin_1_P_origin_1_19_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_19_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_19_1"
+ },
+ {
+ "index": 39,
+ "pair_id": "P_origin_1_P_origin_1_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_2"
+ },
+ {
+ "index": 40,
+ "pair_id": "P_origin_1_P_origin_1_20_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_20_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_20_0"
+ },
+ {
+ "index": 41,
+ "pair_id": "P_origin_1_P_origin_1_20_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_20_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_20_1"
+ },
+ {
+ "index": 42,
+ "pair_id": "P_origin_1_P_origin_1_20_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_20_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_20_2"
+ },
+ {
+ "index": 43,
+ "pair_id": "P_origin_1_P_origin_1_21_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_21_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_21_0"
+ },
+ {
+ "index": 44,
+ "pair_id": "P_origin_1_P_origin_1_21_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_21_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_21_1"
+ },
+ {
+ "index": 45,
+ "pair_id": "P_origin_1_P_origin_1_21_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_21_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_21_2"
+ },
+ {
+ "index": 46,
+ "pair_id": "P_origin_1_P_origin_1_22_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_22_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_22_0"
+ },
+ {
+ "index": 47,
+ "pair_id": "P_origin_1_P_origin_1_22_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_22_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_22_1"
+ },
+ {
+ "index": 48,
+ "pair_id": "P_origin_1_P_origin_1_22_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_22_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_22_2"
+ },
+ {
+ "index": 49,
+ "pair_id": "P_origin_1_P_origin_1_23_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_23_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_23_0"
+ },
+ {
+ "index": 50,
+ "pair_id": "P_origin_1_P_origin_1_23_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_23_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_23_1"
+ },
+ {
+ "index": 51,
+ "pair_id": "P_origin_1_P_origin_1_23_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_23_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_23_2"
+ },
+ {
+ "index": 52,
+ "pair_id": "P_origin_1_P_origin_1_24",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_24.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_24"
+ },
+ {
+ "index": 53,
+ "pair_id": "P_origin_1_P_origin_1_25",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_25.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_25"
+ },
+ {
+ "index": 54,
+ "pair_id": "P_origin_1_P_origin_1_26",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_26.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_26"
+ },
+ {
+ "index": 55,
+ "pair_id": "P_origin_1_P_origin_1_3",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_3.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_3"
+ },
+ {
+ "index": 56,
+ "pair_id": "P_origin_1_P_origin_1_4",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_4.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_4"
+ },
+ {
+ "index": 57,
+ "pair_id": "P_origin_1_P_origin_1_5",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_5.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_5"
+ },
+ {
+ "index": 58,
+ "pair_id": "P_origin_1_P_origin_1_6",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_6.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_6"
+ },
+ {
+ "index": 59,
+ "pair_id": "P_origin_1_P_origin_1_7",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_7.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_7"
+ },
+ {
+ "index": 60,
+ "pair_id": "P_origin_1_P_origin_1_8_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_8_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_8_0"
+ },
+ {
+ "index": 61,
+ "pair_id": "P_origin_1_P_origin_1_8_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_8_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_8_1"
+ },
+ {
+ "index": 62,
+ "pair_id": "P_origin_1_P_origin_1_8_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_8_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_8_2"
+ },
+ {
+ "index": 63,
+ "pair_id": "P_origin_1_P_origin_1_9_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_9_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_9_0"
+ },
+ {
+ "index": 64,
+ "pair_id": "P_origin_1_P_origin_1_9_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_1/P_origin_1_9_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_1",
+ "table_id": "P_origin_1_9_1"
+ },
+ {
+ "index": 65,
+ "pair_id": "P_origin_10_P_origin_10_0_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_10/P_origin_10_0_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_10",
+ "table_id": "P_origin_10_0_0"
+ },
+ {
+ "index": 66,
+ "pair_id": "P_origin_10_P_origin_10_0_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_10/P_origin_10_0_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_10",
+ "table_id": "P_origin_10_0_1"
+ },
+ {
+ "index": 67,
+ "pair_id": "P_origin_10_P_origin_10_0_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_10/P_origin_10_0_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_10",
+ "table_id": "P_origin_10_0_2"
+ },
+ {
+ "index": 68,
+ "pair_id": "P_origin_10_P_origin_10_0_3",
+ "image_paths": [
+ "data/Public/Table/P_origin_10/P_origin_10_0_3.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_10",
+ "table_id": "P_origin_10_0_3"
+ },
+ {
+ "index": 69,
+ "pair_id": "P_origin_10_P_origin_10_0_4",
+ "image_paths": [
+ "data/Public/Table/P_origin_10/P_origin_10_0_4.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_10",
+ "table_id": "P_origin_10_0_4"
+ },
+ {
+ "index": 70,
+ "pair_id": "P_origin_11_P_origin_11_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_0"
+ },
+ {
+ "index": 71,
+ "pair_id": "P_origin_11_P_origin_11_10",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_10.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_10"
+ },
+ {
+ "index": 72,
+ "pair_id": "P_origin_11_P_origin_11_11",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_11.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_11"
+ },
+ {
+ "index": 73,
+ "pair_id": "P_origin_11_P_origin_11_12",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_12.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_12"
+ },
+ {
+ "index": 74,
+ "pair_id": "P_origin_11_P_origin_11_13",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_13.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_13"
+ },
+ {
+ "index": 75,
+ "pair_id": "P_origin_11_P_origin_11_14",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_14.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_14"
+ },
+ {
+ "index": 76,
+ "pair_id": "P_origin_11_P_origin_11_15",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_15.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_15"
+ },
+ {
+ "index": 77,
+ "pair_id": "P_origin_11_P_origin_11_16",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_16.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_16"
+ },
+ {
+ "index": 78,
+ "pair_id": "P_origin_11_P_origin_11_17",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_17.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_17"
+ },
+ {
+ "index": 79,
+ "pair_id": "P_origin_11_P_origin_11_18",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_18.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_18"
+ },
+ {
+ "index": 80,
+ "pair_id": "P_origin_11_P_origin_11_19",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_19.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_19"
+ },
+ {
+ "index": 81,
+ "pair_id": "P_origin_11_P_origin_11_1_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_1_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_1_0"
+ },
+ {
+ "index": 82,
+ "pair_id": "P_origin_11_P_origin_11_1_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_1_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_1_1"
+ },
+ {
+ "index": 83,
+ "pair_id": "P_origin_11_P_origin_11_1_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_1_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_1_2"
+ },
+ {
+ "index": 84,
+ "pair_id": "P_origin_11_P_origin_11_20",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_20.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_20"
+ },
+ {
+ "index": 85,
+ "pair_id": "P_origin_11_P_origin_11_2_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_2_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_2_0"
+ },
+ {
+ "index": 86,
+ "pair_id": "P_origin_11_P_origin_11_2_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_2_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_2_1"
+ },
+ {
+ "index": 87,
+ "pair_id": "P_origin_11_P_origin_11_3",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_3.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_3"
+ },
+ {
+ "index": 88,
+ "pair_id": "P_origin_11_P_origin_11_4",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_4.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_4"
+ },
+ {
+ "index": 89,
+ "pair_id": "P_origin_11_P_origin_11_5",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_5.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_5"
+ },
+ {
+ "index": 90,
+ "pair_id": "P_origin_11_P_origin_11_6",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_6.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_6"
+ },
+ {
+ "index": 91,
+ "pair_id": "P_origin_11_P_origin_11_7",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_7.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_7"
+ },
+ {
+ "index": 92,
+ "pair_id": "P_origin_11_P_origin_11_8",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_8.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_8"
+ },
+ {
+ "index": 93,
+ "pair_id": "P_origin_11_P_origin_11_9",
+ "image_paths": [
+ "data/Public/Table/P_origin_11/P_origin_11_9.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_11",
+ "table_id": "P_origin_11_9"
+ },
+ {
+ "index": 94,
+ "pair_id": "P_origin_2_P_origin_2_0_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_2/P_origin_2_0_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_2",
+ "table_id": "P_origin_2_0_0"
+ },
+ {
+ "index": 95,
+ "pair_id": "P_origin_2_P_origin_2_0_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_2/P_origin_2_0_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_2",
+ "table_id": "P_origin_2_0_1"
+ },
+ {
+ "index": 96,
+ "pair_id": "P_origin_2_P_origin_2_0_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_2/P_origin_2_0_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_2",
+ "table_id": "P_origin_2_0_2"
+ },
+ {
+ "index": 97,
+ "pair_id": "P_origin_2_P_origin_2_0_3",
+ "image_paths": [
+ "data/Public/Table/P_origin_2/P_origin_2_0_3.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_2",
+ "table_id": "P_origin_2_0_3"
+ },
+ {
+ "index": 98,
+ "pair_id": "P_origin_2_P_origin_2_1_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_2/P_origin_2_1_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_2",
+ "table_id": "P_origin_2_1_0"
+ },
+ {
+ "index": 99,
+ "pair_id": "P_origin_2_P_origin_2_1_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_2/P_origin_2_1_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_2",
+ "table_id": "P_origin_2_1_1"
+ },
+ {
+ "index": 100,
+ "pair_id": "P_origin_2_P_origin_2_1_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_2/P_origin_2_1_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_2",
+ "table_id": "P_origin_2_1_2"
+ },
+ {
+ "index": 101,
+ "pair_id": "P_origin_2_P_origin_2_2_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_2/P_origin_2_2_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_2",
+ "table_id": "P_origin_2_2_0"
+ },
+ {
+ "index": 102,
+ "pair_id": "P_origin_2_P_origin_2_2_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_2/P_origin_2_2_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_2",
+ "table_id": "P_origin_2_2_1"
+ },
+ {
+ "index": 103,
+ "pair_id": "P_origin_2_P_origin_2_2_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_2/P_origin_2_2_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_2",
+ "table_id": "P_origin_2_2_2"
+ },
+ {
+ "index": 104,
+ "pair_id": "P_origin_3_P_origin_3_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_3/P_origin_3_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_3",
+ "table_id": "P_origin_3_0"
+ },
+ {
+ "index": 105,
+ "pair_id": "P_origin_3_P_origin_3_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_3/P_origin_3_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_3",
+ "table_id": "P_origin_3_1"
+ },
+ {
+ "index": 106,
+ "pair_id": "P_origin_3_P_origin_3_2_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_3/P_origin_3_2_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_3",
+ "table_id": "P_origin_3_2_0"
+ },
+ {
+ "index": 107,
+ "pair_id": "P_origin_3_P_origin_3_2_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_3/P_origin_3_2_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_3",
+ "table_id": "P_origin_3_2_1"
+ },
+ {
+ "index": 108,
+ "pair_id": "P_origin_4_P_origin_4_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_0"
+ },
+ {
+ "index": 109,
+ "pair_id": "P_origin_4_P_origin_4_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_1"
+ },
+ {
+ "index": 110,
+ "pair_id": "P_origin_4_P_origin_4_10",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_10.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_10"
+ },
+ {
+ "index": 111,
+ "pair_id": "P_origin_4_P_origin_4_11_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_11_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_11_0"
+ },
+ {
+ "index": 112,
+ "pair_id": "P_origin_4_P_origin_4_11_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_11_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_11_1"
+ },
+ {
+ "index": 113,
+ "pair_id": "P_origin_4_P_origin_4_12",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_12.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_12"
+ },
+ {
+ "index": 114,
+ "pair_id": "P_origin_4_P_origin_4_13",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_13.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_13"
+ },
+ {
+ "index": 115,
+ "pair_id": "P_origin_4_P_origin_4_14",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_14.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_14"
+ },
+ {
+ "index": 116,
+ "pair_id": "P_origin_4_P_origin_4_15",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_15.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_15"
+ },
+ {
+ "index": 117,
+ "pair_id": "P_origin_4_P_origin_4_16",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_16.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_16"
+ },
+ {
+ "index": 118,
+ "pair_id": "P_origin_4_P_origin_4_17",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_17.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_17"
+ },
+ {
+ "index": 119,
+ "pair_id": "P_origin_4_P_origin_4_18",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_18.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_18"
+ },
+ {
+ "index": 120,
+ "pair_id": "P_origin_4_P_origin_4_19",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_19.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_19"
+ },
+ {
+ "index": 121,
+ "pair_id": "P_origin_4_P_origin_4_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_2"
+ },
+ {
+ "index": 122,
+ "pair_id": "P_origin_4_P_origin_4_3",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_3.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_3"
+ },
+ {
+ "index": 123,
+ "pair_id": "P_origin_4_P_origin_4_4",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_4.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_4"
+ },
+ {
+ "index": 124,
+ "pair_id": "P_origin_4_P_origin_4_5",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_5.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_5"
+ },
+ {
+ "index": 125,
+ "pair_id": "P_origin_4_P_origin_4_6",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_6.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_6"
+ },
+ {
+ "index": 126,
+ "pair_id": "P_origin_4_P_origin_4_7",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_7.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_7"
+ },
+ {
+ "index": 127,
+ "pair_id": "P_origin_4_P_origin_4_8",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_8.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_8"
+ },
+ {
+ "index": 128,
+ "pair_id": "P_origin_4_P_origin_4_9_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_9_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_9_0"
+ },
+ {
+ "index": 129,
+ "pair_id": "P_origin_4_P_origin_4_9_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_4/P_origin_4_9_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_4",
+ "table_id": "P_origin_4_9_1"
+ },
+ {
+ "index": 130,
+ "pair_id": "P_origin_5_P_origin_5_0_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_0_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_0_0"
+ },
+ {
+ "index": 131,
+ "pair_id": "P_origin_5_P_origin_5_0_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_0_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_0_1"
+ },
+ {
+ "index": 132,
+ "pair_id": "P_origin_5_P_origin_5_10",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_10.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_10"
+ },
+ {
+ "index": 133,
+ "pair_id": "P_origin_5_P_origin_5_11",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_11.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_11"
+ },
+ {
+ "index": 134,
+ "pair_id": "P_origin_5_P_origin_5_12",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_12.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_12"
+ },
+ {
+ "index": 135,
+ "pair_id": "P_origin_5_P_origin_5_13",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_13.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_13"
+ },
+ {
+ "index": 136,
+ "pair_id": "P_origin_5_P_origin_5_14",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_14.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_14"
+ },
+ {
+ "index": 137,
+ "pair_id": "P_origin_5_P_origin_5_16",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_16.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_16"
+ },
+ {
+ "index": 138,
+ "pair_id": "P_origin_5_P_origin_5_17_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_17_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_17_0"
+ },
+ {
+ "index": 139,
+ "pair_id": "P_origin_5_P_origin_5_17_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_17_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_17_1"
+ },
+ {
+ "index": 140,
+ "pair_id": "P_origin_5_P_origin_5_18",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_18.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_18"
+ },
+ {
+ "index": 141,
+ "pair_id": "P_origin_5_P_origin_5_19",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_19.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_19"
+ },
+ {
+ "index": 142,
+ "pair_id": "P_origin_5_P_origin_5_1_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_1_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_1_0"
+ },
+ {
+ "index": 143,
+ "pair_id": "P_origin_5_P_origin_5_1_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_1_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_1_1"
+ },
+ {
+ "index": 144,
+ "pair_id": "P_origin_5_P_origin_5_1_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_1_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_1_2"
+ },
+ {
+ "index": 145,
+ "pair_id": "P_origin_5_P_origin_5_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_2"
+ },
+ {
+ "index": 146,
+ "pair_id": "P_origin_5_P_origin_5_20",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_20.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_20"
+ },
+ {
+ "index": 147,
+ "pair_id": "P_origin_5_P_origin_5_21",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_21.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_21"
+ },
+ {
+ "index": 148,
+ "pair_id": "P_origin_5_P_origin_5_22",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_22.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_22"
+ },
+ {
+ "index": 149,
+ "pair_id": "P_origin_5_P_origin_5_23",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_23.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_23"
+ },
+ {
+ "index": 150,
+ "pair_id": "P_origin_5_P_origin_5_24",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_24.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_24"
+ },
+ {
+ "index": 151,
+ "pair_id": "P_origin_5_P_origin_5_25",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_25.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_25"
+ },
+ {
+ "index": 152,
+ "pair_id": "P_origin_5_P_origin_5_26_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_26_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_26_0"
+ },
+ {
+ "index": 153,
+ "pair_id": "P_origin_5_P_origin_5_26_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_26_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_26_1"
+ },
+ {
+ "index": 154,
+ "pair_id": "P_origin_5_P_origin_5_3",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_3.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_3"
+ },
+ {
+ "index": 155,
+ "pair_id": "P_origin_5_P_origin_5_4",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_4.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_4"
+ },
+ {
+ "index": 156,
+ "pair_id": "P_origin_5_P_origin_5_5",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_5.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_5"
+ },
+ {
+ "index": 157,
+ "pair_id": "P_origin_5_P_origin_5_6",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_6.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_6"
+ },
+ {
+ "index": 158,
+ "pair_id": "P_origin_5_P_origin_5_7",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_7.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_7"
+ },
+ {
+ "index": 159,
+ "pair_id": "P_origin_5_P_origin_5_8",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_8.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_8"
+ },
+ {
+ "index": 160,
+ "pair_id": "P_origin_5_P_origin_5_9",
+ "image_paths": [
+ "data/Public/Table/P_origin_5/P_origin_5_9.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_5",
+ "table_id": "P_origin_5_9"
+ },
+ {
+ "index": 161,
+ "pair_id": "P_origin_6_P_origin_6_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_6/P_origin_6_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_6",
+ "table_id": "P_origin_6_0"
+ },
+ {
+ "index": 162,
+ "pair_id": "P_origin_6_P_origin_6_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_6/P_origin_6_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_6",
+ "table_id": "P_origin_6_1"
+ },
+ {
+ "index": 163,
+ "pair_id": "P_origin_6_P_origin_6_10",
+ "image_paths": [
+ "data/Public/Table/P_origin_6/P_origin_6_10.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_6",
+ "table_id": "P_origin_6_10"
+ },
+ {
+ "index": 164,
+ "pair_id": "P_origin_6_P_origin_6_11",
+ "image_paths": [
+ "data/Public/Table/P_origin_6/P_origin_6_11.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_6",
+ "table_id": "P_origin_6_11"
+ },
+ {
+ "index": 165,
+ "pair_id": "P_origin_6_P_origin_6_12_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_6/P_origin_6_12_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_6",
+ "table_id": "P_origin_6_12_0"
+ },
+ {
+ "index": 166,
+ "pair_id": "P_origin_6_P_origin_6_12_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_6/P_origin_6_12_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_6",
+ "table_id": "P_origin_6_12_1"
+ },
+ {
+ "index": 167,
+ "pair_id": "P_origin_6_P_origin_6_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_6/P_origin_6_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_6",
+ "table_id": "P_origin_6_2"
+ },
+ {
+ "index": 168,
+ "pair_id": "P_origin_6_P_origin_6_3",
+ "image_paths": [
+ "data/Public/Table/P_origin_6/P_origin_6_3.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_6",
+ "table_id": "P_origin_6_3"
+ },
+ {
+ "index": 169,
+ "pair_id": "P_origin_6_P_origin_6_4",
+ "image_paths": [
+ "data/Public/Table/P_origin_6/P_origin_6_4.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_6",
+ "table_id": "P_origin_6_4"
+ },
+ {
+ "index": 170,
+ "pair_id": "P_origin_6_P_origin_6_5",
+ "image_paths": [
+ "data/Public/Table/P_origin_6/P_origin_6_5.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_6",
+ "table_id": "P_origin_6_5"
+ },
+ {
+ "index": 171,
+ "pair_id": "P_origin_6_P_origin_6_6",
+ "image_paths": [
+ "data/Public/Table/P_origin_6/P_origin_6_6.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_6",
+ "table_id": "P_origin_6_6"
+ },
+ {
+ "index": 172,
+ "pair_id": "P_origin_6_P_origin_6_7",
+ "image_paths": [
+ "data/Public/Table/P_origin_6/P_origin_6_7.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_6",
+ "table_id": "P_origin_6_7"
+ },
+ {
+ "index": 173,
+ "pair_id": "P_origin_6_P_origin_6_8",
+ "image_paths": [
+ "data/Public/Table/P_origin_6/P_origin_6_8.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_6",
+ "table_id": "P_origin_6_8"
+ },
+ {
+ "index": 174,
+ "pair_id": "P_origin_6_P_origin_6_9",
+ "image_paths": [
+ "data/Public/Table/P_origin_6/P_origin_6_9.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_6",
+ "table_id": "P_origin_6_9"
+ },
+ {
+ "index": 175,
+ "pair_id": "P_origin_7_P_origin_7_0_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_0_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_7",
+ "table_id": "P_origin_7_0_0"
+ },
+ {
+ "index": 176,
+ "pair_id": "P_origin_7_P_origin_7_0_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_0_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_7",
+ "table_id": "P_origin_7_0_1"
+ },
+ {
+ "index": 177,
+ "pair_id": "P_origin_7_P_origin_7_1_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_1_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_7",
+ "table_id": "P_origin_7_1_0"
+ },
+ {
+ "index": 178,
+ "pair_id": "P_origin_7_P_origin_7_1_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_1_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_7",
+ "table_id": "P_origin_7_1_1"
+ },
+ {
+ "index": 179,
+ "pair_id": "P_origin_7_P_origin_7_2_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_2_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_7",
+ "table_id": "P_origin_7_2_0"
+ },
+ {
+ "index": 180,
+ "pair_id": "P_origin_7_P_origin_7_2_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_2_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_7",
+ "table_id": "P_origin_7_2_1"
+ },
+ {
+ "index": 181,
+ "pair_id": "P_origin_7_P_origin_7_3",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_3.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_7",
+ "table_id": "P_origin_7_3"
+ },
+ {
+ "index": 182,
+ "pair_id": "P_origin_7_P_origin_7_4_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_4_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_7",
+ "table_id": "P_origin_7_4_0"
+ },
+ {
+ "index": 183,
+ "pair_id": "P_origin_7_P_origin_7_4_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_4_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_7",
+ "table_id": "P_origin_7_4_1"
+ },
+ {
+ "index": 184,
+ "pair_id": "P_origin_7_P_origin_7_5",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_5.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_7",
+ "table_id": "P_origin_7_5"
+ },
+ {
+ "index": 185,
+ "pair_id": "P_origin_7_P_origin_7_6",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_6.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_7",
+ "table_id": "P_origin_7_6"
+ },
+ {
+ "index": 186,
+ "pair_id": "P_origin_7_P_origin_7_7",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_7.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_7",
+ "table_id": "P_origin_7_7"
+ },
+ {
+ "index": 187,
+ "pair_id": "P_origin_7_P_origin_7_8_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_8_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_7",
+ "table_id": "P_origin_7_8_0"
+ },
+ {
+ "index": 188,
+ "pair_id": "P_origin_7_P_origin_7_8_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_8_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_7",
+ "table_id": "P_origin_7_8_1"
+ },
+ {
+ "index": 189,
+ "pair_id": "P_origin_7_P_origin_7_8_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_7/P_origin_7_8_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_7",
+ "table_id": "P_origin_7_8_2"
+ },
+ {
+ "index": 190,
+ "pair_id": "P_origin_8_P_origin_8_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_0"
+ },
+ {
+ "index": 191,
+ "pair_id": "P_origin_8_P_origin_8_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_1"
+ },
+ {
+ "index": 192,
+ "pair_id": "P_origin_8_P_origin_8_10_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_10_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_10_0"
+ },
+ {
+ "index": 193,
+ "pair_id": "P_origin_8_P_origin_8_10_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_10_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_10_1"
+ },
+ {
+ "index": 194,
+ "pair_id": "P_origin_8_P_origin_8_11",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_11.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_11"
+ },
+ {
+ "index": 195,
+ "pair_id": "P_origin_8_P_origin_8_12_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_12_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_12_0"
+ },
+ {
+ "index": 196,
+ "pair_id": "P_origin_8_P_origin_8_12_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_12_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_12_1"
+ },
+ {
+ "index": 197,
+ "pair_id": "P_origin_8_P_origin_8_13",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_13.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_13"
+ },
+ {
+ "index": 198,
+ "pair_id": "P_origin_8_P_origin_8_14_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_14_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_14_0"
+ },
+ {
+ "index": 199,
+ "pair_id": "P_origin_8_P_origin_8_14_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_14_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_14_1"
+ },
+ {
+ "index": 200,
+ "pair_id": "P_origin_8_P_origin_8_14_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_14_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_14_2"
+ },
+ {
+ "index": 201,
+ "pair_id": "P_origin_8_P_origin_8_14_3",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_14_3.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_14_3"
+ },
+ {
+ "index": 202,
+ "pair_id": "P_origin_8_P_origin_8_14_4",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_14_4.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_14_4"
+ },
+ {
+ "index": 203,
+ "pair_id": "P_origin_8_P_origin_8_14_5",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_14_5.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_14_5"
+ },
+ {
+ "index": 204,
+ "pair_id": "P_origin_8_P_origin_8_14_6",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_14_6.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_14_6"
+ },
+ {
+ "index": 205,
+ "pair_id": "P_origin_8_P_origin_8_14_7",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_14_7.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_14_7"
+ },
+ {
+ "index": 206,
+ "pair_id": "P_origin_8_P_origin_8_15",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_15.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_15"
+ },
+ {
+ "index": 207,
+ "pair_id": "P_origin_8_P_origin_8_16",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_16.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_16"
+ },
+ {
+ "index": 208,
+ "pair_id": "P_origin_8_P_origin_8_17_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_17_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_17_0"
+ },
+ {
+ "index": 209,
+ "pair_id": "P_origin_8_P_origin_8_17_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_17_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_17_1"
+ },
+ {
+ "index": 210,
+ "pair_id": "P_origin_8_P_origin_8_17_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_17_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_17_2"
+ },
+ {
+ "index": 211,
+ "pair_id": "P_origin_8_P_origin_8_17_3",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_17_3.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_17_3"
+ },
+ {
+ "index": 212,
+ "pair_id": "P_origin_8_P_origin_8_17_4",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_17_4.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_17_4"
+ },
+ {
+ "index": 213,
+ "pair_id": "P_origin_8_P_origin_8_17_5",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_17_5.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_17_5"
+ },
+ {
+ "index": 214,
+ "pair_id": "P_origin_8_P_origin_8_17_6",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_17_6.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_17_6"
+ },
+ {
+ "index": 215,
+ "pair_id": "P_origin_8_P_origin_8_17_7",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_17_7.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_17_7"
+ },
+ {
+ "index": 216,
+ "pair_id": "P_origin_8_P_origin_8_18",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_18.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_18"
+ },
+ {
+ "index": 217,
+ "pair_id": "P_origin_8_P_origin_8_19",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_19.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_19"
+ },
+ {
+ "index": 218,
+ "pair_id": "P_origin_8_P_origin_8_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_2"
+ },
+ {
+ "index": 219,
+ "pair_id": "P_origin_8_P_origin_8_3",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_3.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_3"
+ },
+ {
+ "index": 220,
+ "pair_id": "P_origin_8_P_origin_8_4",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_4.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_4"
+ },
+ {
+ "index": 221,
+ "pair_id": "P_origin_8_P_origin_8_5",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_5.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_5"
+ },
+ {
+ "index": 222,
+ "pair_id": "P_origin_8_P_origin_8_6",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_6.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_6"
+ },
+ {
+ "index": 223,
+ "pair_id": "P_origin_8_P_origin_8_7",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_7.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_7"
+ },
+ {
+ "index": 224,
+ "pair_id": "P_origin_8_P_origin_8_8",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_8.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_8"
+ },
+ {
+ "index": 225,
+ "pair_id": "P_origin_8_P_origin_8_9_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_9_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_9_0"
+ },
+ {
+ "index": 226,
+ "pair_id": "P_origin_8_P_origin_8_9_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_9_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_9_1"
+ },
+ {
+ "index": 227,
+ "pair_id": "P_origin_8_P_origin_8_9_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_8/P_origin_8_9_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_8",
+ "table_id": "P_origin_8_9_2"
+ },
+ {
+ "index": 228,
+ "pair_id": "P_origin_9_P_origin_9_0_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_0_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_0_0"
+ },
+ {
+ "index": 229,
+ "pair_id": "P_origin_9_P_origin_9_0_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_0_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_0_1"
+ },
+ {
+ "index": 230,
+ "pair_id": "P_origin_9_P_origin_9_0_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_0_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_0_2"
+ },
+ {
+ "index": 231,
+ "pair_id": "P_origin_9_P_origin_9_0_3",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_0_3.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_0_3"
+ },
+ {
+ "index": 232,
+ "pair_id": "P_origin_9_P_origin_9_10",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_10.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_10"
+ },
+ {
+ "index": 233,
+ "pair_id": "P_origin_9_P_origin_9_11",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_11.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_11"
+ },
+ {
+ "index": 234,
+ "pair_id": "P_origin_9_P_origin_9_1_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_1_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_1_0"
+ },
+ {
+ "index": 235,
+ "pair_id": "P_origin_9_P_origin_9_1_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_1_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_1_1"
+ },
+ {
+ "index": 236,
+ "pair_id": "P_origin_9_P_origin_9_1_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_1_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_1_2"
+ },
+ {
+ "index": 237,
+ "pair_id": "P_origin_9_P_origin_9_2_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_2_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_2_0"
+ },
+ {
+ "index": 238,
+ "pair_id": "P_origin_9_P_origin_9_2_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_2_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_2_1"
+ },
+ {
+ "index": 239,
+ "pair_id": "P_origin_9_P_origin_9_2_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_2_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_2_2"
+ },
+ {
+ "index": 240,
+ "pair_id": "P_origin_9_P_origin_9_3",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_3.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_3"
+ },
+ {
+ "index": 241,
+ "pair_id": "P_origin_9_P_origin_9_4",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_4.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_4"
+ },
+ {
+ "index": 242,
+ "pair_id": "P_origin_9_P_origin_9_5",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_5.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_5"
+ },
+ {
+ "index": 243,
+ "pair_id": "P_origin_9_P_origin_9_6_0",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_6_0.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_6_0"
+ },
+ {
+ "index": 244,
+ "pair_id": "P_origin_9_P_origin_9_6_1",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_6_1.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_6_1"
+ },
+ {
+ "index": 245,
+ "pair_id": "P_origin_9_P_origin_9_6_2",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_6_2.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_6_2"
+ },
+ {
+ "index": 246,
+ "pair_id": "P_origin_9_P_origin_9_7",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_7.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_7"
+ },
+ {
+ "index": 247,
+ "pair_id": "P_origin_9_P_origin_9_8",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_8.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_8"
+ },
+ {
+ "index": 248,
+ "pair_id": "P_origin_9_P_origin_9_9",
+ "image_paths": [
+ "data/Public/Table/P_origin_9/P_origin_9_9.png"
+ ],
+ "domain": "Public",
+ "origin": "P_origin_9",
+ "table_id": "P_origin_9_9"
+ }
+]
\ No newline at end of file
diff --git a/test_input.json b/test_input.json
deleted file mode 100644
index f51ecd0..0000000
--- a/test_input.json
+++ /dev/null
@@ -1,119 +0,0 @@
-[
- {
- "index": 0,
- "pair_id": "P_origin_0_1",
- "image_paths": [
- "data/Public/Table/P_origin_0/P_origin_0_1_0.png",
- "data/Public/Table/P_origin_0/P_origin_0_1_1.png"
- ],
- "domain": "public"
- },
- {
- "index": 1,
- "pair_id": "P_origin_0_2",
- "image_paths": [
- "data/Public/Table/P_origin_0/P_origin_0_2_1.png",
- "data/Public/Table/P_origin_0/P_origin_0_2_2.png"
- ],
- "domain": "public"
- },
- {
- "index": 2,
- "pair_id": "P_origin_1_0",
- "image_paths": [
- "data/Public/Table/P_origin_1/P_origin_1_0.png"
- ],
- "domain": "public"
- },
- {
- "index": 3,
- "pair_id": "P_origin_1_2",
- "image_paths": [
- "data/Public/Table/P_origin_1/P_origin_1_2.png"
- ],
- "domain": "public"
- },
- {
- "index": 4,
- "pair_id": "P_origin_1_4",
- "image_paths": [
- "data/Public/Table/P_origin_1/P_origin_1_4.png"
- ],
- "domain": "public"
- },
- {
- "index": 5,
- "pair_id": "P_origin_1_5",
- "image_paths": [
- "data/Public/Table/P_origin_1/P_origin_1_5.png"
- ],
- "domain": "public"
- },
- {
- "index": 6,
- "pair_id": "P_origin_1_7",
- "image_paths": [
- "data/Public/Table/P_origin_1/P_origin_1_7.png"
- ],
- "domain": "public"
- },
- {
- "index": 7,
- "pair_id": "P_origin_1_9",
- "image_paths": [
- "data/Public/Table/P_origin_1/P_origin_1_9_0.png",
- "data/Public/Table/P_origin_1/P_origin_1_9_1.png"
- ],
- "domain": "public"
- },
- {
- "index": 8,
- "pair_id": "P_origin_1_10",
- "image_paths": [
- "data/Public/Table/P_origin_1/P_origin_1_10_0.png",
- "data/Public/Table/P_origin_1/P_origin_1_10_1.png"
- ],
- "domain": "public"
- },
- {
- "index": 9,
- "pair_id": "P_origin_1_12",
- "image_paths": [
- "data/Public/Table/P_origin_1/P_origin_1_12_0.png",
- "data/Public/Table/P_origin_1/P_origin_1_12_1.png"
- ],
- "domain": "public"
- },
- {
- "index": 10,
- "pair_id": "P_origin_1_13",
- "image_paths": [
- "data/Public/Table/P_origin_1/P_origin_1_13_0.png"
- ],
- "domain": "public"
- },
- {
- "index": 11,
- "pair_id": "P_origin_1_14",
- "image_paths": [
- "data/Public/Table/P_origin_1/P_origin_1_14_0.png"
- ],
- "domain": "public"
- },
- {
- "index": 12,
- "pair_id": "P_origin_1_23",
- "image_paths": [
- "data/Public/Table/P_origin_1/P_origin_1_23_0.png"
- ],
- "domain": "public"
- },
- {
- "index": 13,
- "pair_id": "P_origin_4_6",
- "image_paths": [
- "data/Public/Table/P_origin_4/P_origin_4_6.png"
- ],
- "domain": "public"
- }
-]
\ No newline at end of file
diff --git a/tests/choi/QA_example/README.md b/tests/choi/QA_example/README.md
deleted file mode 100644
index 30f07f3..0000000
--- a/tests/choi/QA_example/README.md
+++ /dev/null
@@ -1,129 +0,0 @@
-# QA Dataset Generation Module
-
-보험 테이블 마크다운 데이터를 기반으로 고품질 QA(Question-Answer) 데이터셋을 생성하는 모듈입니다.
-
-## 주요 기능
-
-### 1. 난이도별 QA 생성
-- **IR (Information Retrieval)**: 단순 정보 검색 (Level 1)
-- **Analysis**: 분석적 질문 (Level 2)
-- **Compare (Multi-hop)**: 비교 및 다중 추론 (Level 3)
-- **Aggregation**: 집계 연산 (Level 4)
-- **Reasoning**: 복합 추론 (Level 5)
-- **Insight**: 통찰 도출 (Level 6)
-
-### 2. 다양한 답변 유형
-- **Exact Match**: 단답형 (숫자, 예/아니오) - 정확한 매칭 평가
-- **Descriptive**: 서술형 - LLM-as-Judge 평가
-- **Calculation**: 수치 계산 결과 - Python 코드로 검증
-
-### 3. 고급 기능
-- **Multi-Table QA**: 복수 테이블 참조 필요 질문
-- **Follow-up QA**: 꼬리 질문 체인 생성
-- **Evol-Instruct**: 질문 난이도 진화
-- **LLM-as-Judge**: 품질 평가
-
-## 사용법
-
-### 기본 사용
-
-```python
-from QA_example import InsuranceTableQAGenerator, QADifficulty
-
-# 테이블 데이터 준비
-tables = {
- "table_1": "| 구분 | 값 |\n|---|---|\n| A | 100 |",
- "table_2": "| 항목 | 금액 |\n|---|---|\n| B | 200 |"
-}
-
-# Generator 초기화
-generator = InsuranceTableQAGenerator()
-
-# 특정 난이도 QA 생성
-ir_qa = generator.generate_qa_by_difficulty(tables, QADifficulty.IR, num_questions=3)
-
-# 종합 데이터셋 생성
-dataset = generator.generate_comprehensive_qa_dataset(
- tables,
- questions_per_difficulty=2,
- include_followup=True,
- include_evolution=True
-)
-```
-
-### 간편 함수 사용
-
-```python
-from QA_example import generate_qa_from_tables
-
-# 모든 난이도 QA 생성
-all_qa = generate_qa_from_tables(tables, num_questions=2)
-
-# 특정 난이도만 생성
-ir_only = generate_qa_from_tables(tables, difficulty=QADifficulty.IR)
-```
-
-## 커버되는 QA 양상
-
-| # | 양상 | 설명 | 구현 방식 |
-|---|------|------|----------|
-| 1 | Multi-table QA | 복수 테이블 참조 | `generate_multi_table_qa()` |
-| 2 | 난이도별 QA | 6단계 난이도 체계 | `QADifficulty` Enum |
-| 3 | 다양한 답변 유형 | Exact Match, Descriptive | `QAType` Enum |
-| 4 | 수치 계산 QA | 집계, 비율 계산 | Aggregation 난이도 |
-| 5 | 꼬리 질문 | Q-A 체인 | `generate_followup_qa()` |
-| 6 | 셀 기반 측정 | 여러 셀 기반 | Compare, Aggregation |
-| 7 | 특정 셀 Q-A | 단일 셀 검색 | IR 난이도 |
-| 8 | 이미지 연관 QA | 테이블 구조 기반 | 테이블 마크다운 입력 |
-
-## 파일 구조
-
-```
-QA_example/
-├── __init__.py # 모듈 초기화
-├── prompts.py # 프롬프트 템플릿
-├── qa_generator.py # QA 생성 핵심 로직
-├── qa_generation.ipynb # 사용 예제 노트북
-├── README.md # 이 문서
-└── output/ # 생성된 데이터셋
-```
-
-## 프롬프트 전략
-
-### Chain-of-Table
-- 단계별 표 해석 과정 명시
-- 동적 계획법 기반 질문 생성
-
-### Program-of-Thought (PoT)
-- 수치 계산을 Python 코드로 생성
-- 계산 결과의 무결성 보장
-
-### Tabular Chain-of-Thought
-- 추론 과정을 표 형태로 구조화
-- Step → Sub-question → Evidence → Reasoning
-
-### Evol-Instruct
-- 제약 조건 추가 (Adding Constraints)
-- 심층 추론 (Deepening Reasoning)
-- 구체화 (Concretizing)
-- 입력 복잡도 증가 (Complicating Input)
-
-## 품질 평가 (LLM-as-Judge)
-
-생성된 QA의 품질을 5가지 차원으로 평가:
-
-1. **정확성 (Correctness)**: 답변의 사실적 정확성
-2. **충실성 (Faithfulness)**: 테이블 데이터에 대한 충실도
-3. **관련성 (Relevance)**: 보험 도메인 실용성
-4. **난이도 적절성**: 표기 난이도와 실제 난이도 일치
-5. **명확성 (Clarity)**: 질문과 답변의 명확성
-
-## 의존성
-
-- `polling_gemini`: Gemini API 풀링 시스템
-- `google-generativeai`: Google Gemini API
-- `pyyaml`: YAML 설정 파일 처리
-
-## 라이센스
-
-MIT License
diff --git a/tests/choi/QA_example/__init__.py b/tests/choi/QA_example/__init__.py
deleted file mode 100644
index 53e6823..0000000
--- a/tests/choi/QA_example/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-"""
-QA Dataset Generation Module for Insurance Tables
-보험 테이블 기반 QA 데이터셋 생성 모듈
-"""
-
-from .qa_generator import (
- InsuranceTableQAGenerator,
- QADifficulty,
- QAType,
- generate_qa_from_tables,
-)
-
-__all__ = [
- 'InsuranceTableQAGenerator',
- 'QADifficulty',
- 'QAType',
- 'generate_qa_from_tables',
-]
-
-__version__ = '0.1.0'
diff --git a/tests/choi/QA_example/prompts.py b/tests/choi/QA_example/prompts.py
deleted file mode 100644
index 9adfeb5..0000000
--- a/tests/choi/QA_example/prompts.py
+++ /dev/null
@@ -1,479 +0,0 @@
-"""
-QA Generation Prompts for Insurance Table Data
-보험 테이블 기반 QA 생성을 위한 프롬프트 템플릿
-
-난이도별 QA 유형:
-1. IR (Information Retrieval): 단순 정보 검색
-2. Analysis: 분석적 질문
-3. Compare (Multi-hop): 비교 및 다중 추론
-4. Aggregation: 집계 연산
-5. Reasoning: 복합 추론
-6. Insight: 통찰 도출
-
-답변 유형:
-- Exact Match: 단답형 (숫자, 예/아니오)
-- Descriptive: 서술형 (LLM Judge 평가)
-"""
-
-# =============================================================================
-# System Prompts
-# =============================================================================
-
-QA_GENERATOR_SYSTEM_PROMPT = """# Role Definition
-당신은 보험 도메인 전문가이자 고품질 QA 데이터셋 구축 전문가입니다.
-주어진 보험 테이블 데이터를 기반으로 다양한 난이도와 유형의 질문-답변 쌍을 생성해야 합니다.
-
-# Core Principles
-1. **정확성(Accuracy):** 모든 답변은 주어진 테이블 데이터에 근거해야 합니다. 테이블에 없는 정보를 추측하지 마십시오.
-2. **다양성(Diversity):** 단순 검색부터 복잡한 추론까지 다양한 난이도의 질문을 생성해야 합니다.
-3. **실용성(Practicality):** 실제 보험 고객이 물어볼 수 있는 현실적인 질문을 생성해야 합니다.
-4. **명확성(Clarity):** 질문과 답변 모두 명확하고 모호하지 않아야 합니다.
-
-# Difficulty Levels (난이도)
-- **IR (Level 1):** 특정 셀의 값을 직접 찾는 단순 검색
-- **Analysis (Level 2):** 단일 테이블 내에서의 분석적 질문
-- **Compare (Level 3):** 여러 행/열 또는 복수 테이블 간 비교
-- **Aggregation (Level 4):** 합계, 평균, 최대/최소 등의 집계 연산
-- **Reasoning (Level 5):** 여러 정보를 종합한 복합 추론
-- **Insight (Level 6):** 데이터로부터 통찰이나 시사점 도출
-
-# Answer Types (답변 유형)
-- **exact_match:** 숫자, 예/아니오, 특정 텍스트 등 정확히 일치해야 하는 답변
-- **descriptive:** 설명이 필요한 서술형 답변 (LLM-as-Judge로 평가)
-- **calculation:** 수치 계산 결과 (계산 과정 포함)
-- **comparison:** 비교 결과 및 근거"""
-
-# =============================================================================
-# QA Generation Prompts by Difficulty
-# =============================================================================
-
-IR_QA_PROMPT = """## Task: Information Retrieval (IR) Level QA 생성
-단일 테이블에서 특정 셀의 값을 직접 검색하는 간단한 QA를 생성하세요.
-
-### Input Tables
-{tables}
-
-### Requirements
-1. 특정 행과 열이 교차하는 지점의 값을 묻는 질문
-2. 단답형으로 대답 가능한 질문
-3. 테이블에서 바로 찾을 수 있는 정보만 질문
-
-### Output Format (JSON)
-```json
-{{
- "questions": [
- {{
- "id": "IR_001",
- "difficulty": "IR",
- "answer_type": "exact_match",
- "question": "질문 내용",
- "answer": "정확한 답변",
- "evidence": {{
- "table_id": "table_1",
- "row": "행 정보",
- "column": "열 정보"
- }},
- "tags": ["single_cell", "numeric"]
- }}
- ]
-}}
-```
-
-### Generate {num_questions} IR-level QA pairs."""
-
-ANALYSIS_QA_PROMPT = """## Task: Analysis Level QA 생성
-단일 테이블 내에서 데이터를 분석하는 질문을 생성하세요.
-
-### Input Tables
-{tables}
-
-### Requirements
-1. 특정 조건을 만족하는 행/열 찾기
-2. 최대값/최소값을 가진 항목 식별
-3. 특정 범위 내의 데이터 확인
-4. 단답형 또는 짧은 설명형 답변
-
-### Output Format (JSON)
-```json
-{{
- "questions": [
- {{
- "id": "ANALYSIS_001",
- "difficulty": "Analysis",
- "answer_type": "exact_match",
- "question": "질문 내용",
- "answer": "정확한 답변",
- "reasoning": "답을 도출하는 과정 설명",
- "evidence": {{
- "table_id": "table_1",
- "relevant_cells": ["셀 위치1", "셀 위치2"]
- }},
- "tags": ["conditional_search", "extrema"]
- }}
- ]
-}}
-```
-
-### Generate {num_questions} Analysis-level QA pairs."""
-
-COMPARE_QA_PROMPT = """## Task: Compare (Multi-hop) Level QA 생성
-여러 행, 열, 또는 복수 테이블 간 비교가 필요한 질문을 생성하세요.
-
-### Input Tables
-{tables}
-
-### Requirements
-1. 두 개 이상의 셀 값을 비교하는 질문
-2. 시계열 변화를 비교하는 질문
-3. 복수 테이블의 정보를 연결하는 질문
-4. 차이, 비율, 증감률 등을 묻는 질문
-
-### Output Format (JSON)
-```json
-{{
- "questions": [
- {{
- "id": "COMPARE_001",
- "difficulty": "Compare",
- "answer_type": "calculation",
- "question": "질문 내용",
- "answer": "정확한 답변",
- "calculation": "계산 과정",
- "reasoning": "비교 논리 설명",
- "evidence": {{
- "table_ids": ["table_1", "table_2"],
- "compared_cells": [
- {{"table": "table_1", "row": "행1", "column": "열1", "value": "값1"}},
- {{"table": "table_1", "row": "행2", "column": "열2", "value": "값2"}}
- ]
- }},
- "tags": ["multi_hop", "comparison", "calculation"]
- }}
- ]
-}}
-```
-
-### Generate {num_questions} Compare-level QA pairs."""
-
-AGGREGATION_QA_PROMPT = """## Task: Aggregation Level QA 생성
-합계, 평균, 누적값 등 집계 연산이 필요한 질문을 생성하세요.
-
-### Input Tables
-{tables}
-
-### Requirements
-1. 특정 열/행의 합계를 구하는 질문
-2. 평균값을 계산하는 질문
-3. 누적 증가율을 구하는 질문
-4. 조건부 집계 (특정 조건을 만족하는 항목들의 합계 등)
-
-### Python Code for Verification
-답변의 정확성 검증을 위해 Python 코드도 함께 생성하세요.
-
-### Output Format (JSON)
-```json
-{{
- "questions": [
- {{
- "id": "AGG_001",
- "difficulty": "Aggregation",
- "answer_type": "calculation",
- "question": "질문 내용",
- "answer": "정확한 수치 답변",
- "calculation": "단계별 계산 과정",
- "python_verification": "import pandas as pd\\n# 검증 코드",
- "evidence": {{
- "table_id": "table_1",
- "aggregated_cells": ["셀1", "셀2", "셀3"]
- }},
- "tags": ["aggregation", "sum", "average"]
- }}
- ]
-}}
-```
-
-### Generate {num_questions} Aggregation-level QA pairs."""
-
-REASONING_QA_PROMPT = """## Task: Reasoning Level QA 생성
-여러 정보를 종합하여 복합적인 추론이 필요한 질문을 생성하세요.
-
-### Input Tables
-{tables}
-
-### Requirements
-1. 조건부 로직을 적용한 추론 질문
-2. 가정(Assumption)을 포함한 시나리오 기반 질문
-3. 인과관계를 파악하는 질문
-4. 여러 단계의 논리적 추론이 필요한 질문
-
-### Chain-of-Thought Reasoning
-답변 도출 과정을 단계별로 명시하세요.
-
-### Output Format (JSON)
-```json
-{{
- "questions": [
- {{
- "id": "REASON_001",
- "difficulty": "Reasoning",
- "answer_type": "descriptive",
- "question": "질문 내용",
- "answer": "답변",
- "chain_of_thought": [
- "Step 1: ...",
- "Step 2: ...",
- "Step 3: ..."
- ],
- "assumptions": ["가정1", "가정2"],
- "evidence": {{
- "table_ids": ["table_1"],
- "relevant_data": ["관련 데이터 포인트"]
- }},
- "tags": ["multi_step_reasoning", "conditional_logic"]
- }}
- ]
-}}
-```
-
-### Generate {num_questions} Reasoning-level QA pairs."""
-
-INSIGHT_QA_PROMPT = """## Task: Insight Level QA 생성
-데이터로부터 통찰이나 시사점을 도출하는 고난도 질문을 생성하세요.
-
-### Input Tables
-{tables}
-
-### Requirements
-1. 데이터 추세(Trend)를 파악하는 질문
-2. 이상치(Anomaly)나 특이 패턴을 발견하는 질문
-3. 데이터 기반 예측이나 권고를 요청하는 질문
-4. 비즈니스적 함의를 도출하는 질문
-
-### Output Format (JSON)
-```json
-{{
- "questions": [
- {{
- "id": "INSIGHT_001",
- "difficulty": "Insight",
- "answer_type": "descriptive",
- "question": "질문 내용",
- "answer": "통찰 및 답변",
- "supporting_analysis": "분석 과정",
- "key_findings": ["발견1", "발견2"],
- "evidence": {{
- "table_ids": ["table_1", "table_2"],
- "data_points": ["근거 데이터"]
- }},
- "tags": ["trend_analysis", "insight", "recommendation"]
- }}
- ]
-}}
-```
-
-### Generate {num_questions} Insight-level QA pairs."""
-
-# =============================================================================
-# Follow-up Question Prompts (꼬리 질문)
-# =============================================================================
-
-FOLLOWUP_QA_PROMPT = """## Task: Follow-up Question (꼬리 질문) 생성
-주어진 초기 QA에 대해 연속적인 후속 질문을 생성하세요.
-
-### Original QA
-{original_qa}
-
-### Input Tables
-{tables}
-
-### Requirements
-1. 원래 질문의 맥락을 유지하면서 심화된 질문
-2. 원래 답변에서 파생되는 추가 질문
-3. 관련된 다른 데이터 포인트를 탐색하는 질문
-4. 2-3개의 연속적인 후속 질문 체인 생성
-
-### Output Format (JSON)
-```json
-{{
- "original_qa": {{
- "question": "원래 질문",
- "answer": "원래 답변"
- }},
- "followup_chain": [
- {{
- "id": "FOLLOWUP_001_1",
- "question": "후속 질문 1",
- "answer": "답변 1",
- "reasoning": "이전 답변과의 연결고리"
- }},
- {{
- "id": "FOLLOWUP_001_2",
- "question": "후속 질문 2 (질문1 기반)",
- "answer": "답변 2",
- "reasoning": "이전 답변과의 연결고리"
- }}
- ]
-}}
-```
-
-### Generate follow-up questions chain."""
-
-# =============================================================================
-# Multi-Table QA Prompts
-# =============================================================================
-
-MULTI_TABLE_QA_PROMPT = """## Task: Multi-Table QA 생성
-복수의 테이블을 참조해야 답변 가능한 질문을 생성하세요.
-
-### Input Tables
-{tables}
-
-### Requirements
-1. 반드시 2개 이상의 테이블 정보를 조합해야 답변 가능한 질문
-2. 테이블 간 연결 키(Key)를 활용한 질문
-3. 서로 다른 테이블의 수치를 비교/연산하는 질문
-4. 종합적인 분석이 필요한 질문
-
-### Output Format (JSON)
-```json
-{{
- "questions": [
- {{
- "id": "MULTI_001",
- "difficulty": "Compare",
- "answer_type": "calculation",
- "question": "질문 내용",
- "answer": "답변",
- "required_tables": ["table_1", "table_2"],
- "join_logic": "테이블 연결 방법 설명",
- "reasoning": "답변 도출 과정",
- "tags": ["multi_table", "join", "cross_reference"]
- }}
- ]
-}}
-```
-
-### Generate {num_questions} Multi-Table QA pairs."""
-
-# =============================================================================
-# Evol-Instruct Prompts (난이도 진화)
-# =============================================================================
-
-EVOL_INSTRUCT_PROMPT = """## Task: Evol-Instruct - 질문 난이도 진화
-주어진 기본 질문을 더 복잡하고 도전적인 질문으로 진화시키세요.
-
-### Original Question
-{original_question}
-
-### Evolution Strategies
-다음 전략 중 하나 이상을 적용하여 질문을 진화시키세요:
-
-1. **제약 조건 추가 (Adding Constraints):**
- - 특정 조건(나이, 기간, 금액 범위 등)을 추가
-
-2. **심층 추론 (Deepening Reasoning):**
- - 다단계 논리적 사고를 요구하도록 변환
-
-3. **구체화 (Concretizing):**
- - 추상적 질문을 구체적 시나리오로 대체
-
-4. **입력 복잡도 증가 (Complicating Input):**
- - 복수 테이블이나 추가 조건을 참조하도록 변환
-
-### Input Tables
-{tables}
-
-### Output Format (JSON)
-```json
-{{
- "original": {{
- "question": "원래 질문",
- "difficulty": "원래 난이도"
- }},
- "evolved": {{
- "question": "진화된 질문",
- "difficulty": "새로운 난이도",
- "evolution_strategy": "적용된 전략",
- "answer": "새로운 답변",
- "reasoning": "답변 도출 과정"
- }}
-}}
-```
-
-### Evolve the question."""
-
-# =============================================================================
-# Quality Evaluation Prompts (LLM-as-Judge)
-# =============================================================================
-
-QA_EVALUATION_PROMPT = """## Task: QA 품질 평가 (LLM-as-Judge)
-생성된 QA 쌍의 품질을 다면적으로 평가하세요.
-
-### QA to Evaluate
-{qa_pair}
-
-### Reference Tables
-{tables}
-
-### Evaluation Criteria (1-5점 척도)
-
-1. **정확성 (Correctness):**
- - 답변이 테이블 데이터에 정확히 근거하는가?
- - 수치 계산이 정확한가?
-
-2. **충실성 (Faithfulness):**
- - 테이블에 없는 정보를 날조(Hallucination)하지 않았는가?
- - 근거 데이터가 명확한가?
-
-3. **관련성 (Relevance):**
- - 질문이 보험 도메인에서 실용적인가?
- - 실제 고객이 물어볼 법한 질문인가?
-
-4. **난이도 적절성 (Difficulty Appropriateness):**
- - 표기된 난이도와 실제 난이도가 일치하는가?
-
-5. **명확성 (Clarity):**
- - 질문과 답변이 명확하고 모호하지 않은가?
-
-### Output Format (JSON)
-```json
-{{
- "evaluation": {{
- "correctness": {{"score": 5, "comment": "평가 코멘트"}},
- "faithfulness": {{"score": 5, "comment": "평가 코멘트"}},
- "relevance": {{"score": 5, "comment": "평가 코멘트"}},
- "difficulty_appropriateness": {{"score": 5, "comment": "평가 코멘트"}},
- "clarity": {{"score": 5, "comment": "평가 코멘트"}}
- }},
- "overall_score": 5.0,
- "pass": true,
- "improvement_suggestions": ["개선 제안1", "개선 제안2"]
-}}
-```
-
-### Evaluate the QA pair."""
-
-
-# =============================================================================
-# Helper Functions
-# =============================================================================
-
-def get_qa_prompt_by_difficulty(difficulty: str) -> str:
- """난이도에 따른 프롬프트 반환"""
- prompts = {
- "IR": IR_QA_PROMPT,
- "Analysis": ANALYSIS_QA_PROMPT,
- "Compare": COMPARE_QA_PROMPT,
- "Aggregation": AGGREGATION_QA_PROMPT,
- "Reasoning": REASONING_QA_PROMPT,
- "Insight": INSIGHT_QA_PROMPT,
- }
- return prompts.get(difficulty, IR_QA_PROMPT)
-
-
-def format_tables_for_prompt(tables: dict) -> str:
- """테이블 딕셔너리를 프롬프트용 문자열로 변환"""
- formatted = []
- for table_id, table_content in tables.items():
- formatted.append(f"### {table_id}\n```markdown\n{table_content}\n```\n")
- return "\n".join(formatted)
diff --git a/tests/choi/QA_example/qa_generation.ipynb b/tests/choi/QA_example/qa_generation.ipynb
deleted file mode 100644
index 34481fa..0000000
--- a/tests/choi/QA_example/qa_generation.ipynb
+++ /dev/null
@@ -1,1432 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "id": "8386b6ee",
- "metadata": {},
- "source": [
- "## 1. 환경 설정"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "8972c3e2",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "프로젝트 루트: /Users/jaehyeokchoi/Desktop/chois_toy/private/TableMagnifier\n",
- "현재 작업 디렉토리: /Users/jaehyeokchoi/Desktop/chois_toy/private/TableMagnifier/QA_example\n"
- ]
- }
- ],
- "source": [
- "import sys\n",
- "import json\n",
- "import asyncio\n",
- "from pathlib import Path\n",
- "from datetime import datetime\n",
- "\n",
- "# 프로젝트 루트 경로 설정\n",
- "project_root = Path.cwd().parent\n",
- "if str(project_root) not in sys.path:\n",
- " sys.path.insert(0, str(project_root))\n",
- "\n",
- "print(f\"프로젝트 루트: {project_root}\")\n",
- "print(f\"현재 작업 디렉토리: {Path.cwd()}\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "id": "ca7782da",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "✅ 모듈 임포트 완료 (리로드됨)\n"
- ]
- }
- ],
- "source": [
- "# QA Generator 모듈 임포트 (변경 시 리로드)\n",
- "import importlib\n",
- "import QA_example.qa_generator\n",
- "importlib.reload(QA_example.qa_generator)\n",
- "\n",
- "from QA_example.qa_generator import (\n",
- " InsuranceTableQAGenerator,\n",
- " QADifficulty,\n",
- " QAType,\n",
- " generate_qa_from_tables,\n",
- ")\n",
- "\n",
- "# Gemini API Pool 임포트\n",
- "from polling_gemini import get_gemini_pool\n",
- "\n",
- "print(\"✅ 모듈 임포트 완료 (리로드됨)\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "id": "68b1f92e",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "📊 API Pool 상태:\n",
- " 현재 키: key1\n",
- " 총 키 수: 3\n"
- ]
- }
- ],
- "source": [
- "# API Pool 상태 확인\n",
- "pool = get_gemini_pool()\n",
- "print(\"📊 API Pool 상태:\")\n",
- "print(f\" 현재 키: {pool.get_current_key_info()['name']}\")\n",
- "print(f\" 총 키 수: {pool.get_current_key_info()['total_keys']}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "c38801d5",
- "metadata": {},
- "source": [
- "## 2. 샘플 테이블 데이터 준비\n",
- "\n",
- "보험 도메인의 다양한 테이블 예시를 준비합니다."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "id": "504b0780",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "✅ 샘플 테이블 데이터 준비 완료\n",
- " - TABLE_1: 보험료 산출 기초율\n",
- " - TABLE_2: 해지환급금 예시\n",
- " - TABLE_3: 보장 내역\n",
- " - TABLE_4: 연령별 보험료\n"
- ]
- }
- ],
- "source": [
- "# 샘플 테이블 1: 보험료 산출 기초율 테이블 (실제 데이터 기반)\n",
- "TABLE_1_PREMIUM_CALCULATION = \"\"\"\n",
- "|구분|XX세|XX+1세|XX+2세|XX+3세|XX+4세|XX+5세|\n",
- "|---|---|---|---|---|---|---|\n",
- "|나이증가분(A)||1059|1357|1739|2229|2855|\n",
- "|보험료 산출 기초율(위험률 등) 증가분(B=전년도 기준보험료의 최대 25% 가정)||10846|13897|17806|22815|29232|\n",
- "|기준보험료(C=전년도 기준보험료+A+B)|42325|54321|69485|89030|114074|146161|\n",
- "\"\"\".strip()\n",
- "\n",
- "# 샘플 테이블 2: 해지환급금 예시표\n",
- "TABLE_2_SURRENDER_VALUE = \"\"\"\n",
- "|경과기간|납입보험료 누계|해지환급금|환급률|\n",
- "|---|---|---|---|\n",
- "|1년|600000|0|0%|\n",
- "|3년|1800000|540000|30%|\n",
- "|5년|3000000|1650000|55%|\n",
- "|10년|6000000|4800000|80%|\n",
- "|15년|9000000|8550000|95%|\n",
- "|20년(만기)|12000000|12000000|100%|\n",
- "\"\"\".strip()\n",
- "\n",
- "# 샘플 테이블 3: 보장 내역표\n",
- "TABLE_3_COVERAGE = \"\"\"\n",
- "|보장항목|보장내용|지급금액|지급조건|\n",
- "|---|---|---|---|\n",
- "|사망보험금|일반사망|50000000|피보험자 사망시|\n",
- "|사망보험금|재해사망|100000000|재해로 인한 사망시|\n",
- "|암진단금|일반암|30000000|암 최초 진단시|\n",
- "|암진단금|소액암|6000000|소액암 진단시|\n",
- "|암진단금|유사암|3000000|유사암 진단시|\n",
- "|입원비|일반입원|50000|1일당 (최대 180일)|\n",
- "|입원비|암입원|100000|1일당 (최대 180일)|\n",
- "|수술비|일반수술|500000|1회당|\n",
- "|수술비|암수술|2000000|1회당|\n",
- "\"\"\".strip()\n",
- "\n",
- "# 샘플 테이블 4: 연령별 보험료 예시\n",
- "TABLE_4_AGE_PREMIUM = \"\"\"\n",
- "|가입연령|성별|20년납_월보험료|전기납_월보험료|일시납_총보험료|\n",
- "|---|---|---|---|---|\n",
- "|30세|남|45000|38000|8500000|\n",
- "|30세|여|42000|35000|7800000|\n",
- "|40세|남|65000|52000|11500000|\n",
- "|40세|여|58000|47000|10200000|\n",
- "|50세|남|95000|75000|16000000|\n",
- "|50세|여|82000|65000|14000000|\n",
- "\"\"\".strip()\n",
- "\n",
- "print(\"✅ 샘플 테이블 데이터 준비 완료\")\n",
- "print(f\" - TABLE_1: 보험료 산출 기초율\")\n",
- "print(f\" - TABLE_2: 해지환급금 예시\")\n",
- "print(f\" - TABLE_3: 보장 내역\")\n",
- "print(f\" - TABLE_4: 연령별 보험료\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "id": "c22033b5",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "============================================================\n",
- "📋 table_1_premium_calculation\n",
- "============================================================\n"
- ]
- },
- {
- "data": {
- "text/markdown": [
- "|구분|XX세|XX+1세|XX+2세|XX+3세|XX+4세|XX+5세|\n",
- "|---|---|---|---|---|---|---|\n",
- "|나이증가분(A)||1059|1357|1739|2229|2855|\n",
- "|보험료 산출 기초율(위험률 등) 증가분(B=전년도 기준보험료의 최대 25% 가정)||10846|13897|17806|22815|29232|\n",
- "|기준보험료(C=전년도 기준보험료+A+B)|42325|54321|69485|89030|114074|146161|"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "============================================================\n",
- "📋 table_2_surrender_value\n",
- "============================================================\n"
- ]
- },
- {
- "data": {
- "text/markdown": [
- "|경과기간|납입보험료 누계|해지환급금|환급률|\n",
- "|---|---|---|---|\n",
- "|1년|600000|0|0%|\n",
- "|3년|1800000|540000|30%|\n",
- "|5년|3000000|1650000|55%|\n",
- "|10년|6000000|4800000|80%|\n",
- "|15년|9000000|8550000|95%|\n",
- "|20년(만기)|12000000|12000000|100%|"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "============================================================\n",
- "📋 table_3_coverage\n",
- "============================================================\n"
- ]
- },
- {
- "data": {
- "text/markdown": [
- "|보장항목|보장내용|지급금액|지급조건|\n",
- "|---|---|---|---|\n",
- "|사망보험금|일반사망|50000000|피보험자 사망시|\n",
- "|사망보험금|재해사망|100000000|재해로 인한 사망시|\n",
- "|암진단금|일반암|30000000|암 최초 진단시|\n",
- "|암진단금|소액암|6000000|소액암 진단시|\n",
- "|암진단금|유사암|3000000|유사암 진단시|\n",
- "|입원비|일반입원|50000|1일당 (최대 180일)|\n",
- "|입원비|암입원|100000|1일당 (최대 180일)|\n",
- "|수술비|일반수술|500000|1회당|\n",
- "|수술비|암수술|2000000|1회당|"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "============================================================\n",
- "📋 table_4_age_premium\n",
- "============================================================\n"
- ]
- },
- {
- "data": {
- "text/markdown": [
- "|가입연령|성별|20년납_월보험료|전기납_월보험료|일시납_총보험료|\n",
- "|---|---|---|---|---|\n",
- "|30세|남|45000|38000|8500000|\n",
- "|30세|여|42000|35000|7800000|\n",
- "|40세|남|65000|52000|11500000|\n",
- "|40세|여|58000|47000|10200000|\n",
- "|50세|남|95000|75000|16000000|\n",
- "|50세|여|82000|65000|14000000|"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "# 테이블 딕셔너리 구성\n",
- "tables = {\n",
- " \"table_1_premium_calculation\": TABLE_1_PREMIUM_CALCULATION,\n",
- " \"table_2_surrender_value\": TABLE_2_SURRENDER_VALUE,\n",
- " \"table_3_coverage\": TABLE_3_COVERAGE,\n",
- " \"table_4_age_premium\": TABLE_4_AGE_PREMIUM,\n",
- "}\n",
- "\n",
- "# 테이블 미리보기\n",
- "from IPython.display import display, Markdown\n",
- "\n",
- "for table_id, content in tables.items():\n",
- " print(f\"\\n{'='*60}\")\n",
- " print(f\"📋 {table_id}\")\n",
- " print('='*60)\n",
- " display(Markdown(content))"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "0480f78c",
- "metadata": {},
- "source": [
- "## 3. QA Generator 초기화"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "id": "d4d5e0c6",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "✅ QA Generator 초기화 완료\n",
- " 사용 모델: gemini-2.0-flash\n"
- ]
- }
- ],
- "source": [
- "# QA Generator 인스턴스 생성\n",
- "qa_generator = InsuranceTableQAGenerator(\n",
- " model_name=\"gemini-2.0-flash\" # 또는 \"gemini-1.5-flash\"\n",
- ")\n",
- "\n",
- "print(\"✅ QA Generator 초기화 완료\")\n",
- "print(f\" 사용 모델: gemini-2.0-flash\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "f21cb567",
- "metadata": {},
- "source": [
- "## 4. 난이도별 QA 생성\n",
- "\n",
- "### 4.1 IR (Information Retrieval) - 단순 정보 검색"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "id": "11267214",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "🔍 IR (Information Retrieval) QA 생성 중...\n",
- "\n",
- "✅ 3개의 IR QA 생성 완료\n",
- "\n",
- "📌 [IR_001] IR\n",
- " Q: XX세의 기준보험료는 얼마인가요?\n",
- " A: 42325\n",
- " Evidence: {'table_id': 'table_1_premium_calculation', 'row': '기준보험료(C=전년도 기준보험료+A+B)', 'column': 'XX세'}\n",
- "\n",
- "📌 [IR_002] IR\n",
- " Q: 경과기간이 1년일 때 해지환급금은 얼마인가요?\n",
- " A: 0\n",
- " Evidence: {'table_id': 'table_2_surrender_value', 'row': '1년', 'column': '해지환급금'}\n",
- "\n",
- "📌 [IR_003] IR\n",
- " Q: 30세 남성의 20년납 월보험료는 얼마인가요?\n",
- " A: 45000\n",
- " Evidence: {'table_id': 'table_4_age_premium', 'row': '30세 (남)', 'column': '20년납_월보험료'}\n",
- "\n",
- "✅ 3개의 IR QA 생성 완료\n",
- "\n",
- "📌 [IR_001] IR\n",
- " Q: XX세의 기준보험료는 얼마인가요?\n",
- " A: 42325\n",
- " Evidence: {'table_id': 'table_1_premium_calculation', 'row': '기준보험료(C=전년도 기준보험료+A+B)', 'column': 'XX세'}\n",
- "\n",
- "📌 [IR_002] IR\n",
- " Q: 경과기간이 1년일 때 해지환급금은 얼마인가요?\n",
- " A: 0\n",
- " Evidence: {'table_id': 'table_2_surrender_value', 'row': '1년', 'column': '해지환급금'}\n",
- "\n",
- "📌 [IR_003] IR\n",
- " Q: 30세 남성의 20년납 월보험료는 얼마인가요?\n",
- " A: 45000\n",
- " Evidence: {'table_id': 'table_4_age_premium', 'row': '30세 (남)', 'column': '20년납_월보험료'}\n"
- ]
- }
- ],
- "source": [
- "# IR 난이도 QA 생성 (단답형, 특정 셀 검색)\n",
- "print(\"🔍 IR (Information Retrieval) QA 생성 중...\")\n",
- "\n",
- "ir_qa_pairs = qa_generator.generate_qa_by_difficulty(\n",
- " tables=tables,\n",
- " difficulty=QADifficulty.IR,\n",
- " num_questions=3\n",
- ")\n",
- "\n",
- "print(f\"\\n✅ {len(ir_qa_pairs)}개의 IR QA 생성 완료\")\n",
- "for qa in ir_qa_pairs:\n",
- " print(f\"\\n📌 [{qa.id}] {qa.difficulty}\")\n",
- " print(f\" Q: {qa.question}\")\n",
- " print(f\" A: {qa.answer}\")\n",
- " if qa.evidence:\n",
- " print(f\" Evidence: {qa.evidence}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "6c4f477c",
- "metadata": {},
- "source": [
- "### 4.2 Analysis - 분석적 질문"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "id": "647c157f",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "📊 Analysis QA 생성 중...\n",
- "\n",
- "✅ 3개의 Analysis QA 생성 완료\n",
- "\n",
- "📌 [ANALYSIS_001] Analysis\n",
- " Q: 암진단금 보장항목 중에서 가장 높은 지급금액을 제공하는 보장내용은 무엇인가요?\n",
- " A: 일반암\n",
- " Reasoning: table_3_coverage에서 '보장항목'이 '암진단금'인 행들을 확인하고, 그 중 '지급금액'이 가장 높은 '보장내용'을 찾습니다. 일반암은 30,000,000원, 소액암은 6,000,000원, 유사암은 3,000,000원이므로 일반암이 가장 높습니다.\n",
- "\n",
- "📌 [ANALYSIS_002] Analysis\n",
- " Q: 해지환급률이 50% 이상이 되려면 최소 몇 년의 경과기간이 필요합니까?\n",
- " A: 5년\n",
- " Reasoning: table_2_surrender_value에서 '환급률'이 50% 이상인 행들을 찾고, 그 중 가장 작은 '경과기간'을 확인합니다. 3년 경과 시 환급률은 30%이고, 5년 경과 시 환급률은 55%이므로 최소 5년이 필요합니다.\n",
- "\n",
- "📌 [ANALYSIS_003] Analysis\n",
- " Q: 40세 가입자의 경우, 남성과 여성 중 누가 20년납_월보험료가 더 높은가요?\n",
- " A: 남성\n",
- " Reasoning: table_4_age_premium에서 '가입연령'이 40세인 행들을 찾고, 해당 행들의 '20년납_월보험료'를 비교합니다. 40세 남성의 20년납_월보험료는 65,000원이고, 40세 여성의 20년납_월보험료는 58,000원이므로 남성이 더 높습니다.\n",
- "\n",
- "✅ 3개의 Analysis QA 생성 완료\n",
- "\n",
- "📌 [ANALYSIS_001] Analysis\n",
- " Q: 암진단금 보장항목 중에서 가장 높은 지급금액을 제공하는 보장내용은 무엇인가요?\n",
- " A: 일반암\n",
- " Reasoning: table_3_coverage에서 '보장항목'이 '암진단금'인 행들을 확인하고, 그 중 '지급금액'이 가장 높은 '보장내용'을 찾습니다. 일반암은 30,000,000원, 소액암은 6,000,000원, 유사암은 3,000,000원이므로 일반암이 가장 높습니다.\n",
- "\n",
- "📌 [ANALYSIS_002] Analysis\n",
- " Q: 해지환급률이 50% 이상이 되려면 최소 몇 년의 경과기간이 필요합니까?\n",
- " A: 5년\n",
- " Reasoning: table_2_surrender_value에서 '환급률'이 50% 이상인 행들을 찾고, 그 중 가장 작은 '경과기간'을 확인합니다. 3년 경과 시 환급률은 30%이고, 5년 경과 시 환급률은 55%이므로 최소 5년이 필요합니다.\n",
- "\n",
- "📌 [ANALYSIS_003] Analysis\n",
- " Q: 40세 가입자의 경우, 남성과 여성 중 누가 20년납_월보험료가 더 높은가요?\n",
- " A: 남성\n",
- " Reasoning: table_4_age_premium에서 '가입연령'이 40세인 행들을 찾고, 해당 행들의 '20년납_월보험료'를 비교합니다. 40세 남성의 20년납_월보험료는 65,000원이고, 40세 여성의 20년납_월보험료는 58,000원이므로 남성이 더 높습니다.\n"
- ]
- }
- ],
- "source": [
- "# Analysis 난이도 QA 생성\n",
- "print(\"📊 Analysis QA 생성 중...\")\n",
- "\n",
- "analysis_qa_pairs = qa_generator.generate_qa_by_difficulty(\n",
- " tables=tables,\n",
- " difficulty=QADifficulty.ANALYSIS,\n",
- " num_questions=3\n",
- ")\n",
- "\n",
- "print(f\"\\n✅ {len(analysis_qa_pairs)}개의 Analysis QA 생성 완료\")\n",
- "for qa in analysis_qa_pairs:\n",
- " print(f\"\\n📌 [{qa.id}] {qa.difficulty}\")\n",
- " print(f\" Q: {qa.question}\")\n",
- " print(f\" A: {qa.answer}\")\n",
- " if qa.reasoning:\n",
- " print(f\" Reasoning: {qa.reasoning}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7c744604",
- "metadata": {},
- "source": [
- "### 4.3 Compare (Multi-hop) - 비교 및 다중 추론"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "id": "597806d6",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "⚖️ Compare (Multi-hop) QA 생성 중...\n",
- "\n",
- "✅ 3개의 Compare QA 생성 완료\n",
- "\n",
- "📌 [COMPARE_001] Compare\n",
- " Q: table_1_premium_calculation에서 XX+1세의 경우, 보험료 산출 기초율 증가분(B)은 나이증가분(A)보다 얼마나 더 큰가요?\n",
- " A: 9,787원 더 큽니다.\n",
- " Reasoning: XX+1세의 보험료 산출 기초율 증가분(B) 값에서 나이증가분(A) 값을 차감하여 그 차이를 계산합니다.\n",
- "\n",
- "📌 [COMPARE_002] Compare\n",
- " Q: table_2_surrender_value에서 보험 가입 후 5년 경과 시점과 10년 경과 시점의 해지환급금은 얼마나 차이가 나나요?\n",
- " A: 3,150,000원 차이가 납니다.\n",
- " Reasoning: 10년 경과 시점의 해지환급금에서 5년 경과 시점의 해지환급금을 차감하여 차이를 계산합니다.\n",
- "\n",
- "📌 [COMPARE_003] Compare\n",
- " Q: table_4_age_premium에서 30세 남성이 20년납 월보험료로 가입했을 경우, 20년 만기 시점의 총 납입보험료와 table_2_surrender_value의 20년 경과 시점 해지환급금은 얼마나 차이가 나나요?\n",
- " A: 1,200,000원 차이가 납니다.\n",
- " Reasoning: table_4에서 30세 남성의 20년납 월보험료를 찾아 20년간의 총 납입보험료를 계산하고, table_2에서 20년 경과 시점의 해지환급금을 찾아 두 값의 차이를 계산합니다.\n",
- "\n",
- "✅ 3개의 Compare QA 생성 완료\n",
- "\n",
- "📌 [COMPARE_001] Compare\n",
- " Q: table_1_premium_calculation에서 XX+1세의 경우, 보험료 산출 기초율 증가분(B)은 나이증가분(A)보다 얼마나 더 큰가요?\n",
- " A: 9,787원 더 큽니다.\n",
- " Reasoning: XX+1세의 보험료 산출 기초율 증가분(B) 값에서 나이증가분(A) 값을 차감하여 그 차이를 계산합니다.\n",
- "\n",
- "📌 [COMPARE_002] Compare\n",
- " Q: table_2_surrender_value에서 보험 가입 후 5년 경과 시점과 10년 경과 시점의 해지환급금은 얼마나 차이가 나나요?\n",
- " A: 3,150,000원 차이가 납니다.\n",
- " Reasoning: 10년 경과 시점의 해지환급금에서 5년 경과 시점의 해지환급금을 차감하여 차이를 계산합니다.\n",
- "\n",
- "📌 [COMPARE_003] Compare\n",
- " Q: table_4_age_premium에서 30세 남성이 20년납 월보험료로 가입했을 경우, 20년 만기 시점의 총 납입보험료와 table_2_surrender_value의 20년 경과 시점 해지환급금은 얼마나 차이가 나나요?\n",
- " A: 1,200,000원 차이가 납니다.\n",
- " Reasoning: table_4에서 30세 남성의 20년납 월보험료를 찾아 20년간의 총 납입보험료를 계산하고, table_2에서 20년 경과 시점의 해지환급금을 찾아 두 값의 차이를 계산합니다.\n"
- ]
- }
- ],
- "source": [
- "# Compare 난이도 QA 생성 (Multi-hop 추론)\n",
- "print(\"⚖️ Compare (Multi-hop) QA 생성 중...\")\n",
- "\n",
- "compare_qa_pairs = qa_generator.generate_qa_by_difficulty(\n",
- " tables=tables,\n",
- " difficulty=QADifficulty.COMPARE,\n",
- " num_questions=3\n",
- ")\n",
- "\n",
- "print(f\"\\n✅ {len(compare_qa_pairs)}개의 Compare QA 생성 완료\")\n",
- "for qa in compare_qa_pairs:\n",
- " print(f\"\\n📌 [{qa.id}] {qa.difficulty}\")\n",
- " print(f\" Q: {qa.question}\")\n",
- " print(f\" A: {qa.answer}\")\n",
- " if qa.reasoning:\n",
- " print(f\" Reasoning: {qa.reasoning}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "c91c5b2e",
- "metadata": {},
- "source": [
- "### 4.4 Aggregation - 집계 연산"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "id": "c8a555ce",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "➕ Aggregation QA 생성 중...\n",
- "\n",
- "✅ 3개의 Aggregation QA 생성 완료\n",
- "\n",
- "📌 [AGG_001] Aggregation\n",
- " Q: table_1_premium_calculation에서 XX+1세부터 XX+5세까지 '나이증가분(A)'의 총합은 얼마인가요?\n",
- " A: 9239\n",
- " Python 검증 코드 포함: ✅\n",
- "\n",
- "📌 [AGG_002] Aggregation\n",
- " Q: table_2_surrender_value에서 경과기간 1년, 3년, 5년, 10년, 15년, 20년 시점의 '해지환급금'의 평균은 얼마인가요?\n",
- " A: 4590000\n",
- " Python 검증 코드 포함: ✅\n",
- "\n",
- "📌 [AGG_003] Aggregation\n",
- " Q: table_4_age_premium에서 30세 남성이 20년납으로 가입했을 경우, 20년 동안 총 납입해야 할 보험료는 얼마인가요?\n",
- " A: 10800000\n",
- " Python 검증 코드 포함: ✅\n",
- "\n",
- "✅ 3개의 Aggregation QA 생성 완료\n",
- "\n",
- "📌 [AGG_001] Aggregation\n",
- " Q: table_1_premium_calculation에서 XX+1세부터 XX+5세까지 '나이증가분(A)'의 총합은 얼마인가요?\n",
- " A: 9239\n",
- " Python 검증 코드 포함: ✅\n",
- "\n",
- "📌 [AGG_002] Aggregation\n",
- " Q: table_2_surrender_value에서 경과기간 1년, 3년, 5년, 10년, 15년, 20년 시점의 '해지환급금'의 평균은 얼마인가요?\n",
- " A: 4590000\n",
- " Python 검증 코드 포함: ✅\n",
- "\n",
- "📌 [AGG_003] Aggregation\n",
- " Q: table_4_age_premium에서 30세 남성이 20년납으로 가입했을 경우, 20년 동안 총 납입해야 할 보험료는 얼마인가요?\n",
- " A: 10800000\n",
- " Python 검증 코드 포함: ✅\n"
- ]
- }
- ],
- "source": [
- "# Aggregation 난이도 QA 생성 (수치 계산)\n",
- "print(\"➕ Aggregation QA 생성 중...\")\n",
- "\n",
- "agg_qa_pairs = qa_generator.generate_qa_by_difficulty(\n",
- " tables=tables,\n",
- " difficulty=QADifficulty.AGGREGATION,\n",
- " num_questions=3\n",
- ")\n",
- "\n",
- "print(f\"\\n✅ {len(agg_qa_pairs)}개의 Aggregation QA 생성 완료\")\n",
- "for qa in agg_qa_pairs:\n",
- " print(f\"\\n📌 [{qa.id}] {qa.difficulty}\")\n",
- " print(f\" Q: {qa.question}\")\n",
- " print(f\" A: {qa.answer}\")\n",
- " if qa.python_verification:\n",
- " print(f\" Python 검증 코드 포함: ✅\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "05f6c85e",
- "metadata": {},
- "source": [
- "### 4.5 Reasoning - 복합 추론"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "id": "0a3ce609",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "🧠 Reasoning QA 생성 중...\n",
- "\n",
- "✅ 3개의 Reasoning QA 생성 완료\n",
- "\n",
- "📌 [REASON_001] Reasoning\n",
- " Q: 30세 남성이 20년납 월보험료로 가입했을 때, `table_1`의 '기준보험료' 증가율이 '월보험료'에 동일하게 적용된다고 가정하면, 가입 후 3년이 지난 시점(즉, 33세)의 월보험료는 얼마가 될까요?\n",
- " A: 약 94,658원\n",
- " Chain of Thought:\n",
- " Step 1: Step 1: `table_4`에서 30세 남성의 20년납 월보험료를 확인합니다: 45,000원.\n",
- " Step 2: Step 2: `table_1`에서 'XX세'를 30세로 가정하고, 'XX세' (30세)의 기준보험료(C)는 42,325원, 'XX+3세' (33세)의 기준보험료(C)는 89,030원임을 확인합니다.\n",
- " Step 3: Step 3: 30세 대비 33세의 기준보험료 증가율을 계산합니다: (89,030 / 42,325).\n",
- " Step 4: Step 4: 이 증가율을 30세 남성의 초기 월보험료에 적용하여 33세의 월보험료를 계산합니다: 45,000원 * (89,030 / 42,325) = 94,657.76...\n",
- " Step 5: Step 5: 계산된 값을 반올림하여 최종 월보험료를 도출합니다.\n",
- "\n",
- "📌 [REASON_002] Reasoning\n",
- " Q: 3년 동안 보험료를 납입한 계약자가 일반암 진단을 받고, 진단금을 수령한 직후 보험을 해지한다면, 이 계약자가 총 얼마의 금액을 받게 될까요? (단, 납입보험료는 해지환급금 계산에만 영향을 미치며, 진단금 수령 시점까지의 납입보험료는 해지환급금 누계에 포함된다고 가정합니다.)\n",
- " A: 30,540,000원\n",
- " Chain of Thought:\n",
- " Step 1: Step 1: `table_3`에서 '일반암' 진단금을 확인합니다: 30,000,000원.\n",
- " Step 2: Step 2: `table_2`에서 '경과기간' 3년 시점의 '해지환급금'을 확인합니다: 540,000원.\n",
- " Step 3: Step 3: 계약자가 받게 될 총 금액은 '일반암 진단금'과 '해지환급금'의 합계입니다.\n",
- " Step 4: Step 4: 총 수령액 = 30,000,000원 + 540,000원 = 30,540,000원.\n",
- "\n",
- "📌 [REASON_003] Reasoning\n",
- " Q: 40세 남성과 40세 여성이 각각 20년납으로 보험에 가입했다고 가정합니다. 두 사람 모두 가입 후 5년이 지난 시점에 일반암 진단을 받고, 10일간 암으로 입원했으며, 1회의 암수술을 받았을 경우, 다음 두 가지 질문에 답하세요.\n",
- "1. 두 사람이 5년간 납입한 총 보험료의 차이는 얼마입니까?\n",
- "2. 두 사람이 받게 되는 총 보장금액의 차이는 얼마입니까?\n",
- " A: 1. 5년간 납입 총 보험료 차이: 420,000원\n",
- "2. 총 보장금액 차이: 0원\n",
- " Chain of Thought:\n",
- " Step 1: Step 1: `table_4`에서 40세 남성과 40세 여성의 20년납 월보험료를 확인합니다.\n",
- " Step 2: - 40세 남성: 65,000원\n",
- " Step 3: - 40세 여성: 58,000원\n",
- " Step 4: Step 2: 각 성별로 5년간 납입할 총 보험료를 계산합니다 (월보험료 * 12개월 * 5년).\n",
- " Step 5: - 남성 총 납입 보험료: 65,000원 * 12 * 5 = 3,900,000원\n",
- " Step 6: - 여성 총 납입 보험료: 58,000원 * 12 * 5 = 3,480,000원\n",
- " Step 7: Step 3: 두 사람의 5년간 납입 총 보험료 차이를 계산합니다: 3,900,000원 - 3,480,000원 = 420,000원.\n",
- " Step 8: Step 4: `table_3`에서 일반암 진단금, 암입원비 (10일), 암수술비 (1회)를 확인합니다.\n",
- " Step 9: - 일반암 진단금: 30,000,000원\n",
- " Step 10: - 암입원비: 100,000원 (1일당) * 10일 = 1,000,000원\n",
- " Step 11: - 암수술비: 2,000,000원 (1회당)\n",
- " Step 12: Step 5: 각 사람이 받게 되는 총 보장금액을 계산합니다 (성별에 따른 보장금액 차이는 `table_3`에 명시되어 있지 않으므로 동일하다고 가정).\n",
- " Step 13: - 총 보장금액 = 30,000,000원 + 1,000,000원 + 2,000,000원 = 33,000,000원.\n",
- " Step 14: Step 6: 두 사람의 총 보장금액 차이를 계산합니다: 33,000,000원 - 33,000,000원 = 0원.\n",
- "\n",
- "✅ 3개의 Reasoning QA 생성 완료\n",
- "\n",
- "📌 [REASON_001] Reasoning\n",
- " Q: 30세 남성이 20년납 월보험료로 가입했을 때, `table_1`의 '기준보험료' 증가율이 '월보험료'에 동일하게 적용된다고 가정하면, 가입 후 3년이 지난 시점(즉, 33세)의 월보험료는 얼마가 될까요?\n",
- " A: 약 94,658원\n",
- " Chain of Thought:\n",
- " Step 1: Step 1: `table_4`에서 30세 남성의 20년납 월보험료를 확인합니다: 45,000원.\n",
- " Step 2: Step 2: `table_1`에서 'XX세'를 30세로 가정하고, 'XX세' (30세)의 기준보험료(C)는 42,325원, 'XX+3세' (33세)의 기준보험료(C)는 89,030원임을 확인합니다.\n",
- " Step 3: Step 3: 30세 대비 33세의 기준보험료 증가율을 계산합니다: (89,030 / 42,325).\n",
- " Step 4: Step 4: 이 증가율을 30세 남성의 초기 월보험료에 적용하여 33세의 월보험료를 계산합니다: 45,000원 * (89,030 / 42,325) = 94,657.76...\n",
- " Step 5: Step 5: 계산된 값을 반올림하여 최종 월보험료를 도출합니다.\n",
- "\n",
- "📌 [REASON_002] Reasoning\n",
- " Q: 3년 동안 보험료를 납입한 계약자가 일반암 진단을 받고, 진단금을 수령한 직후 보험을 해지한다면, 이 계약자가 총 얼마의 금액을 받게 될까요? (단, 납입보험료는 해지환급금 계산에만 영향을 미치며, 진단금 수령 시점까지의 납입보험료는 해지환급금 누계에 포함된다고 가정합니다.)\n",
- " A: 30,540,000원\n",
- " Chain of Thought:\n",
- " Step 1: Step 1: `table_3`에서 '일반암' 진단금을 확인합니다: 30,000,000원.\n",
- " Step 2: Step 2: `table_2`에서 '경과기간' 3년 시점의 '해지환급금'을 확인합니다: 540,000원.\n",
- " Step 3: Step 3: 계약자가 받게 될 총 금액은 '일반암 진단금'과 '해지환급금'의 합계입니다.\n",
- " Step 4: Step 4: 총 수령액 = 30,000,000원 + 540,000원 = 30,540,000원.\n",
- "\n",
- "📌 [REASON_003] Reasoning\n",
- " Q: 40세 남성과 40세 여성이 각각 20년납으로 보험에 가입했다고 가정합니다. 두 사람 모두 가입 후 5년이 지난 시점에 일반암 진단을 받고, 10일간 암으로 입원했으며, 1회의 암수술을 받았을 경우, 다음 두 가지 질문에 답하세요.\n",
- "1. 두 사람이 5년간 납입한 총 보험료의 차이는 얼마입니까?\n",
- "2. 두 사람이 받게 되는 총 보장금액의 차이는 얼마입니까?\n",
- " A: 1. 5년간 납입 총 보험료 차이: 420,000원\n",
- "2. 총 보장금액 차이: 0원\n",
- " Chain of Thought:\n",
- " Step 1: Step 1: `table_4`에서 40세 남성과 40세 여성의 20년납 월보험료를 확인합니다.\n",
- " Step 2: - 40세 남성: 65,000원\n",
- " Step 3: - 40세 여성: 58,000원\n",
- " Step 4: Step 2: 각 성별로 5년간 납입할 총 보험료를 계산합니다 (월보험료 * 12개월 * 5년).\n",
- " Step 5: - 남성 총 납입 보험료: 65,000원 * 12 * 5 = 3,900,000원\n",
- " Step 6: - 여성 총 납입 보험료: 58,000원 * 12 * 5 = 3,480,000원\n",
- " Step 7: Step 3: 두 사람의 5년간 납입 총 보험료 차이를 계산합니다: 3,900,000원 - 3,480,000원 = 420,000원.\n",
- " Step 8: Step 4: `table_3`에서 일반암 진단금, 암입원비 (10일), 암수술비 (1회)를 확인합니다.\n",
- " Step 9: - 일반암 진단금: 30,000,000원\n",
- " Step 10: - 암입원비: 100,000원 (1일당) * 10일 = 1,000,000원\n",
- " Step 11: - 암수술비: 2,000,000원 (1회당)\n",
- " Step 12: Step 5: 각 사람이 받게 되는 총 보장금액을 계산합니다 (성별에 따른 보장금액 차이는 `table_3`에 명시되어 있지 않으므로 동일하다고 가정).\n",
- " Step 13: - 총 보장금액 = 30,000,000원 + 1,000,000원 + 2,000,000원 = 33,000,000원.\n",
- " Step 14: Step 6: 두 사람의 총 보장금액 차이를 계산합니다: 33,000,000원 - 33,000,000원 = 0원.\n"
- ]
- }
- ],
- "source": [
- "# Reasoning 난이도 QA 생성 (Chain-of-Thought)\n",
- "print(\"🧠 Reasoning QA 생성 중...\")\n",
- "\n",
- "reasoning_qa_pairs = qa_generator.generate_qa_by_difficulty(\n",
- " tables=tables,\n",
- " difficulty=QADifficulty.REASONING,\n",
- " num_questions=3\n",
- ")\n",
- "\n",
- "print(f\"\\n✅ {len(reasoning_qa_pairs)}개의 Reasoning QA 생성 완료\")\n",
- "for qa in reasoning_qa_pairs:\n",
- " print(f\"\\n📌 [{qa.id}] {qa.difficulty}\")\n",
- " print(f\" Q: {qa.question}\")\n",
- " print(f\" A: {qa.answer}\")\n",
- " if qa.chain_of_thought:\n",
- " print(f\" Chain of Thought:\")\n",
- " for i, step in enumerate(qa.chain_of_thought, 1):\n",
- " print(f\" Step {i}: {step}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "8c33297d",
- "metadata": {},
- "source": [
- "### 4.6 Insight - 통찰 도출"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "id": "72b940c2",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "💡 Insight QA 생성 중...\n",
- "\n",
- "✅ 2개의 Insight QA 생성 완료\n",
- "\n",
- "📌 [INSIGHT_001] Insight\n",
- " Q: 이 보험 상품의 해지환급금 데이터를 분석했을 때, 조기 해지가 고객에게 미치는 재정적 영향은 무엇이며, 이는 상품 설계 철학에 대해 어떤 시사점을 제공합니까?\n",
- " A: 이 보험 상품을 조기에 해지할 경우, 고객은 상당한 재정적 손실을 입게 됩니다. 예를 들어, 1년 경과 후 해지 시 납입보험료 누계의 0%만 환급되며, 3년 후에도 30%만 돌려받습니다. 납입한 원금(환급률 100%)을 온전히 돌려받으려면 20년 만기까지 유지해야 합니다. 이는 이 상품이 장기적인 보장과 유지를 전제로 설계되었으며, 단기 해지에 대해 높은 ...\n",
- "\n",
- "📌 [INSIGHT_002] Insight\n",
- " Q: 보험료 산출 구성 요소와 연령 및 성별에 따른 월 보험료 데이터를 종합적으로 고려할 때, 이 보험 상품이 위험을 평가하고 보험료를 책정하는 방식에 대해 어떤 통찰을 얻을 수 있으며, 특히 연령이 증가함에 따라 위험 평가가 어떻게 변화하는 것으로 보입니까?\n",
- " A: 이 보험 상품은 연령이 증가함에 따라 위험 관련 요소를 보험료 산출에 매우 중요하게 반영하고 있습니다. `table_1`에서 '나이증가분(A)'보다 '보험료 산출 기초율(위험률 등) 증가분(B)'이 훨씬 더 큰 폭으로 보험료를 상승시키는 주요 요인임을 알 수 있습니다. 이는 연령이 높아질수록 질병 발생률이나 사고 위험 등 보험사가 인지하는 위험도가 급격히 ...\n",
- "\n",
- "✅ 2개의 Insight QA 생성 완료\n",
- "\n",
- "📌 [INSIGHT_001] Insight\n",
- " Q: 이 보험 상품의 해지환급금 데이터를 분석했을 때, 조기 해지가 고객에게 미치는 재정적 영향은 무엇이며, 이는 상품 설계 철학에 대해 어떤 시사점을 제공합니까?\n",
- " A: 이 보험 상품을 조기에 해지할 경우, 고객은 상당한 재정적 손실을 입게 됩니다. 예를 들어, 1년 경과 후 해지 시 납입보험료 누계의 0%만 환급되며, 3년 후에도 30%만 돌려받습니다. 납입한 원금(환급률 100%)을 온전히 돌려받으려면 20년 만기까지 유지해야 합니다. 이는 이 상품이 장기적인 보장과 유지를 전제로 설계되었으며, 단기 해지에 대해 높은 ...\n",
- "\n",
- "📌 [INSIGHT_002] Insight\n",
- " Q: 보험료 산출 구성 요소와 연령 및 성별에 따른 월 보험료 데이터를 종합적으로 고려할 때, 이 보험 상품이 위험을 평가하고 보험료를 책정하는 방식에 대해 어떤 통찰을 얻을 수 있으며, 특히 연령이 증가함에 따라 위험 평가가 어떻게 변화하는 것으로 보입니까?\n",
- " A: 이 보험 상품은 연령이 증가함에 따라 위험 관련 요소를 보험료 산출에 매우 중요하게 반영하고 있습니다. `table_1`에서 '나이증가분(A)'보다 '보험료 산출 기초율(위험률 등) 증가분(B)'이 훨씬 더 큰 폭으로 보험료를 상승시키는 주요 요인임을 알 수 있습니다. 이는 연령이 높아질수록 질병 발생률이나 사고 위험 등 보험사가 인지하는 위험도가 급격히 ...\n"
- ]
- }
- ],
- "source": [
- "# Insight 난이도 QA 생성 (서술형)\n",
- "print(\"💡 Insight QA 생성 중...\")\n",
- "\n",
- "insight_qa_pairs = qa_generator.generate_qa_by_difficulty(\n",
- " tables=tables,\n",
- " difficulty=QADifficulty.INSIGHT,\n",
- " num_questions=2\n",
- ")\n",
- "\n",
- "print(f\"\\n✅ {len(insight_qa_pairs)}개의 Insight QA 생성 완료\")\n",
- "for qa in insight_qa_pairs:\n",
- " print(f\"\\n📌 [{qa.id}] {qa.difficulty}\")\n",
- " print(f\" Q: {qa.question}\")\n",
- " print(f\" A: {qa.answer[:200]}...\" if len(qa.answer) > 200 else f\" A: {qa.answer}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "15f40c4d",
- "metadata": {},
- "source": [
- "## 5. Multi-Table QA 생성\n",
- "\n",
- "복수의 테이블을 참조해야 답변 가능한 질문을 생성합니다."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "id": "2dbbbad2",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "🔗 Multi-Table QA 생성 중...\n",
- "\n",
- "✅ 3개의 Multi-Table QA 생성 완료\n",
- "\n",
- "📌 [MULTI_001] Reasoning\n",
- " Q: table_2에서 10년 경과 시점의 납입보험료 누계가 6,000,000원일 때, 만약 이 보험이 20년납 상품이었다면 월 평균 보험료는 얼마였을까요? 그리고 이 월 평균 보험료는 table_4의 30세 남성 20년납 월보험료(45,000원)와 비교했을 때 얼마나 차이가 나나요?\n",
- " A: table_2의 10년 경과 시점 납입보험료 누계(6,000,000원)를 기준으로 20년납 상품의 월 평균 보험료를 계산하면 50,000원입니다. 이는 table_4의 30세 남성 20년납 월보험료(45,000원)보다 5,000원 더 높은 금액입니다.\n",
- "\n",
- "📌 [MULTI_002] Calculation\n",
- " Q: table_4에서 40세 여성이 전기납 월보험료 상품에 가입했다고 할 때, 이 보험료가 table_1의 'XX세' 기준보험료(C)에 해당한다고 가정합니다. 이 경우, 'XX+2세' 시점의 기준보험료는 얼마로 예상할 수 있을까요? (단, table_1의 나이증가분(A)와 보험료 산출 기초율 증가분(B)는 table_1의 해당 값을 따릅니다.)\n",
- " A: table_4의 40세 여성 전기납 월보험료 47,000원을 'XX세' 기준보험료로 가정하면, 'XX+1세' 시점의 기준보험료는 47,000 + 1,059 + 10,846 = 58,905원입니다. 이어서 'XX+2세' 시점의 기준보험료는 58,905 + 1,357 + 13,897 = 74,159원입니다.\n",
- "\n",
- "📌 [MULTI_003] Reasoning\n",
- " Q: 40세 남성이 20년납 월보험료 상품에 가입하여 5년 동안 보험료를 납입했을 경우, 총 납입한 보험료는 얼마인가요? 만약 이 시점에 일반암 진단을 받았다면, 총 납입 보험료 대비 암진단금은 몇 배에 해당하나요? 또한, 만약 이 보험의 해지환급금이 table_2의 '5년 경과' 시점의 납입보험료 누계(3,000,000원)에 해당하는 해지환급금과 동일하다고 가정할 때, 암진단금은 해지환급금보다 얼마나 더 많은 금액인가요?\n",
- " A: 40세 남성이 5년 동안 납입한 총 보험료는 3,900,000원입니다. 이 시점에 일반암 진단을 받았다면, 총 납입 보험료 대비 암진단금(30,000,000원)은 약 7.69배에 해당합니다. 또한, table_2의 5년 경과 시점 해지환급금(1,650,000원)과 비교했을 때, 암진단금은 해지환급금보다 28,350,000원 더 많은 금액입니다.\n",
- "\n",
- "✅ 3개의 Multi-Table QA 생성 완료\n",
- "\n",
- "📌 [MULTI_001] Reasoning\n",
- " Q: table_2에서 10년 경과 시점의 납입보험료 누계가 6,000,000원일 때, 만약 이 보험이 20년납 상품이었다면 월 평균 보험료는 얼마였을까요? 그리고 이 월 평균 보험료는 table_4의 30세 남성 20년납 월보험료(45,000원)와 비교했을 때 얼마나 차이가 나나요?\n",
- " A: table_2의 10년 경과 시점 납입보험료 누계(6,000,000원)를 기준으로 20년납 상품의 월 평균 보험료를 계산하면 50,000원입니다. 이는 table_4의 30세 남성 20년납 월보험료(45,000원)보다 5,000원 더 높은 금액입니다.\n",
- "\n",
- "📌 [MULTI_002] Calculation\n",
- " Q: table_4에서 40세 여성이 전기납 월보험료 상품에 가입했다고 할 때, 이 보험료가 table_1의 'XX세' 기준보험료(C)에 해당한다고 가정합니다. 이 경우, 'XX+2세' 시점의 기준보험료는 얼마로 예상할 수 있을까요? (단, table_1의 나이증가분(A)와 보험료 산출 기초율 증가분(B)는 table_1의 해당 값을 따릅니다.)\n",
- " A: table_4의 40세 여성 전기납 월보험료 47,000원을 'XX세' 기준보험료로 가정하면, 'XX+1세' 시점의 기준보험료는 47,000 + 1,059 + 10,846 = 58,905원입니다. 이어서 'XX+2세' 시점의 기준보험료는 58,905 + 1,357 + 13,897 = 74,159원입니다.\n",
- "\n",
- "📌 [MULTI_003] Reasoning\n",
- " Q: 40세 남성이 20년납 월보험료 상품에 가입하여 5년 동안 보험료를 납입했을 경우, 총 납입한 보험료는 얼마인가요? 만약 이 시점에 일반암 진단을 받았다면, 총 납입 보험료 대비 암진단금은 몇 배에 해당하나요? 또한, 만약 이 보험의 해지환급금이 table_2의 '5년 경과' 시점의 납입보험료 누계(3,000,000원)에 해당하는 해지환급금과 동일하다고 가정할 때, 암진단금은 해지환급금보다 얼마나 더 많은 금액인가요?\n",
- " A: 40세 남성이 5년 동안 납입한 총 보험료는 3,900,000원입니다. 이 시점에 일반암 진단을 받았다면, 총 납입 보험료 대비 암진단금(30,000,000원)은 약 7.69배에 해당합니다. 또한, table_2의 5년 경과 시점 해지환급금(1,650,000원)과 비교했을 때, 암진단금은 해지환급금보다 28,350,000원 더 많은 금액입니다.\n"
- ]
- }
- ],
- "source": [
- "# Multi-Table QA 생성\n",
- "print(\"🔗 Multi-Table QA 생성 중...\")\n",
- "\n",
- "multi_table_qa = qa_generator.generate_multi_table_qa(\n",
- " tables=tables,\n",
- " num_questions=3\n",
- ")\n",
- "\n",
- "print(f\"\\n✅ {len(multi_table_qa)}개의 Multi-Table QA 생성 완료\")\n",
- "for qa in multi_table_qa:\n",
- " print(f\"\\n📌 [{qa.id}] {qa.difficulty}\")\n",
- " print(f\" Q: {qa.question}\")\n",
- " print(f\" A: {qa.answer}\")\n",
- " if qa.evidence and 'required_tables' in str(qa.evidence):\n",
- " print(f\" 참조 테이블: {qa.evidence}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "cc4df731",
- "metadata": {},
- "source": [
- "## 6. 꼬리 질문 (Follow-up) 생성\n",
- "\n",
- "특정 QA에 대해 연속적인 후속 질문 체인을 생성합니다."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "id": "d2c2b8a1",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "🔄 Follow-up QA 생성 중...\n",
- "\n",
- "원본 QA:\n",
- " Q: XX세의 기준보험료는 얼마인가요?\n",
- " A: 42325\n",
- "\n",
- "✅ 3개의 Follow-up QA 생성 완료\n",
- "\n",
- " 🔹 Follow-up 1:\n",
- " Q: XX세에서 XX+1세로 나이가 증가할 때, 기준보험료는 얼마나 증가하나요?\n",
- " A: XX세의 기준보험료는 42325원이고, XX+1세의 기준보험료는 54321원이므로, 54321 - 42325 = 11996원 증가합니다.\n",
- "\n",
- " 🔹 Follow-up 2:\n",
- " Q: 이 11996원의 기준보험료 증가분은 주로 '나이증가분(A)'과 '보험료 산출 기초율(위험률 등) 증가분(B)' 중 어떤 요인에 의해 발생한 것인가요?\n",
- " A: XX+1세의 '나이증가분(A)'은 1059원이고, '보험료 산출 기초율(위험률 등) 증가분(B)'은 10846원입니다. 두 요인 중 '보험료 산출 기초율(위험률 등) 증가분(B)'이 훨씬 크므로, 주로 이 요인에 의해 기준보험료가 증가했습니다.\n",
- "\n",
- " 🔹 Follow-up 3:\n",
- " Q: '보험료 산출 기초율(위험률 등) 증가분(B)'이 '전년도 기준보험료의 최대 25% 가정'이라는 설명에 따르면, XX+1세의 B값(10846원)은 XX세의 기준보험료(42325원)의 25%를 초과하나요?\n",
- " A: XX세의 기준보험료 42325원의 25%는 42325 * 0.25 = 10581.25원입니다. XX+1세의 '보험료 산출 기초율 증가분(B)'은 10846원이므로, 10581.25원을 초과합니다.\n",
- "\n",
- "✅ 3개의 Follow-up QA 생성 완료\n",
- "\n",
- " 🔹 Follow-up 1:\n",
- " Q: XX세에서 XX+1세로 나이가 증가할 때, 기준보험료는 얼마나 증가하나요?\n",
- " A: XX세의 기준보험료는 42325원이고, XX+1세의 기준보험료는 54321원이므로, 54321 - 42325 = 11996원 증가합니다.\n",
- "\n",
- " 🔹 Follow-up 2:\n",
- " Q: 이 11996원의 기준보험료 증가분은 주로 '나이증가분(A)'과 '보험료 산출 기초율(위험률 등) 증가분(B)' 중 어떤 요인에 의해 발생한 것인가요?\n",
- " A: XX+1세의 '나이증가분(A)'은 1059원이고, '보험료 산출 기초율(위험률 등) 증가분(B)'은 10846원입니다. 두 요인 중 '보험료 산출 기초율(위험률 등) 증가분(B)'이 훨씬 크므로, 주로 이 요인에 의해 기준보험료가 증가했습니다.\n",
- "\n",
- " 🔹 Follow-up 3:\n",
- " Q: '보험료 산출 기초율(위험률 등) 증가분(B)'이 '전년도 기준보험료의 최대 25% 가정'이라는 설명에 따르면, XX+1세의 B값(10846원)은 XX세의 기준보험료(42325원)의 25%를 초과하나요?\n",
- " A: XX세의 기준보험료 42325원의 25%는 42325 * 0.25 = 10581.25원입니다. XX+1세의 '보험료 산출 기초율 증가분(B)'은 10846원이므로, 10581.25원을 초과합니다.\n"
- ]
- }
- ],
- "source": [
- "# 꼬리 질문 생성 (IR QA 기반)\n",
- "if ir_qa_pairs:\n",
- " print(\"🔄 Follow-up QA 생성 중...\")\n",
- " \n",
- " original_qa = ir_qa_pairs[0]\n",
- " print(f\"\\n원본 QA:\")\n",
- " print(f\" Q: {original_qa.question}\")\n",
- " print(f\" A: {original_qa.answer}\")\n",
- " \n",
- " followup_result = qa_generator.generate_followup_qa(\n",
- " tables=tables,\n",
- " original_qa=original_qa\n",
- " )\n",
- " \n",
- " if followup_result and 'followup_chain' in followup_result:\n",
- " print(f\"\\n✅ {len(followup_result['followup_chain'])}개의 Follow-up QA 생성 완료\")\n",
- " for i, followup in enumerate(followup_result['followup_chain'], 1):\n",
- " print(f\"\\n 🔹 Follow-up {i}:\")\n",
- " print(f\" Q: {followup.get('question', 'N/A')}\")\n",
- " print(f\" A: {followup.get('answer', 'N/A')}\")\n",
- " else:\n",
- " print(f\"\\n결과: {followup_result}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "c1b07346",
- "metadata": {},
- "source": [
- "## 7. Evol-Instruct: 질문 난이도 진화\n",
- "\n",
- "기본 질문을 더 복잡한 질문으로 진화시킵니다."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "id": "1ea8a3b0",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "🧬 Evol-Instruct 적용 중...\n",
- "\n",
- "원본 질문: 40세 남성의 20년납 월보험료는 얼마인가?\n",
- "\n",
- "✅ 진화 완료\n",
- " 진화 전략: 제약 조건 추가 (Adding Constraints), 심층 추론 (Deepening Reasoning), 입력 복잡도 증가 (Complicating Input)\n",
- " 진화된 질문: 40세 남성이 20년납으로 보험에 가입할 경우 월보험료는 65,000원이다. 만약 이 남성이 10년 뒤인 50세에 동일한 조건으로 가입한다면 월보험료는 얼마로 예상되는가? 그리고 table_1_premium_calculation 데이터를 참조하여 나이가 증가함에 따라 보험료가 상승하는 주요 원인 두 가지를 설명하시오.\n",
- " 새로운 난이도: Reasoning (Level 5)\n",
- " 답변: 50세에 동일한 조건으로 가입한다면 월보험료는 95,000원으로 예상됩니다. 보험료가 나이가 증가함에 따라 상승하는 주요 원인은 '나이증가분(A)'과 '보험료 산출 기초율(위험률 등) 증가분(B)'입니다. 이 두 가지 요소가 전년도 기준보험료에 더해져 새로운 기준보험료(C)를 형성하기 때문입니다.\n",
- "\n",
- "✅ 진화 완료\n",
- " 진화 전략: 제약 조건 추가 (Adding Constraints), 심층 추론 (Deepening Reasoning), 입력 복잡도 증가 (Complicating Input)\n",
- " 진화된 질문: 40세 남성이 20년납으로 보험에 가입할 경우 월보험료는 65,000원이다. 만약 이 남성이 10년 뒤인 50세에 동일한 조건으로 가입한다면 월보험료는 얼마로 예상되는가? 그리고 table_1_premium_calculation 데이터를 참조하여 나이가 증가함에 따라 보험료가 상승하는 주요 원인 두 가지를 설명하시오.\n",
- " 새로운 난이도: Reasoning (Level 5)\n",
- " 답변: 50세에 동일한 조건으로 가입한다면 월보험료는 95,000원으로 예상됩니다. 보험료가 나이가 증가함에 따라 상승하는 주요 원인은 '나이증가분(A)'과 '보험료 산출 기초율(위험률 등) 증가분(B)'입니다. 이 두 가지 요소가 전년도 기준보험료에 더해져 새로운 기준보험료(C)를 형성하기 때문입니다.\n"
- ]
- }
- ],
- "source": [
- "# Evol-Instruct 적용\n",
- "print(\"🧬 Evol-Instruct 적용 중...\")\n",
- "\n",
- "# 간단한 질문에서 시작\n",
- "simple_question = \"40세 남성의 20년납 월보험료는 얼마인가?\"\n",
- "print(f\"\\n원본 질문: {simple_question}\")\n",
- "\n",
- "evolved_result = qa_generator.evolve_question(\n",
- " tables=tables,\n",
- " original_question=simple_question\n",
- ")\n",
- "\n",
- "if evolved_result and 'evolved' in evolved_result:\n",
- " evolved = evolved_result['evolved']\n",
- " print(f\"\\n✅ 진화 완료\")\n",
- " print(f\" 진화 전략: {evolved.get('evolution_strategy', 'N/A')}\")\n",
- " print(f\" 진화된 질문: {evolved.get('question', 'N/A')}\")\n",
- " print(f\" 새로운 난이도: {evolved.get('difficulty', 'N/A')}\")\n",
- " print(f\" 답변: {evolved.get('answer', 'N/A')}\")\n",
- "else:\n",
- " print(f\"\\n결과: {evolved_result}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "4ca79e89",
- "metadata": {},
- "source": [
- "## 8. LLM-as-Judge: 품질 평가"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "id": "287c2e34",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "⚖️ QA 품질 평가 중...\n",
- "\n",
- "평가 대상 QA:\n",
- " Q: XX세의 기준보험료는 얼마인가요?\n",
- " A: 42325\n",
- "\n",
- "✅ 평가 완료\n",
- " 📊 Overall Score: 5.0/5.0\n",
- " ✓ Pass: 예\n",
- "\n",
- " 세부 점수:\n",
- " - 정확성: 5\n",
- " - 충실성: 5\n",
- " - 관련성: 5\n",
- " - 난이도 적절성: 5\n",
- " - 명확성: 5\n",
- "\n",
- "✅ 평가 완료\n",
- " 📊 Overall Score: 5.0/5.0\n",
- " ✓ Pass: 예\n",
- "\n",
- " 세부 점수:\n",
- " - 정확성: 5\n",
- " - 충실성: 5\n",
- " - 관련성: 5\n",
- " - 난이도 적절성: 5\n",
- " - 명확성: 5\n"
- ]
- }
- ],
- "source": [
- "# QA 품질 평가\n",
- "if ir_qa_pairs:\n",
- " print(\"⚖️ QA 품질 평가 중...\")\n",
- " \n",
- " qa_to_evaluate = ir_qa_pairs[0]\n",
- " print(f\"\\n평가 대상 QA:\")\n",
- " print(f\" Q: {qa_to_evaluate.question}\")\n",
- " print(f\" A: {qa_to_evaluate.answer}\")\n",
- " \n",
- " evaluation = qa_generator.evaluate_qa(\n",
- " tables=tables,\n",
- " qa_pair=qa_to_evaluate\n",
- " )\n",
- " \n",
- " if evaluation:\n",
- " print(f\"\\n✅ 평가 완료\")\n",
- " print(f\" 📊 Overall Score: {evaluation.overall_score}/5.0\")\n",
- " print(f\" ✓ Pass: {'예' if evaluation.passed else '아니오'}\")\n",
- " print(f\"\\n 세부 점수:\")\n",
- " print(f\" - 정확성: {evaluation.correctness.get('score', 'N/A')}\")\n",
- " print(f\" - 충실성: {evaluation.faithfulness.get('score', 'N/A')}\")\n",
- " print(f\" - 관련성: {evaluation.relevance.get('score', 'N/A')}\")\n",
- " print(f\" - 난이도 적절성: {evaluation.difficulty_appropriateness.get('score', 'N/A')}\")\n",
- " print(f\" - 명확성: {evaluation.clarity.get('score', 'N/A')}\")\n",
- " \n",
- " if evaluation.improvement_suggestions:\n",
- " print(f\"\\n 💡 개선 제안:\")\n",
- " for suggestion in evaluation.improvement_suggestions:\n",
- " print(f\" - {suggestion}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "5dcbcf93",
- "metadata": {},
- "source": [
- "## 9. 종합 QA 데이터셋 생성\n",
- "\n",
- "모든 난이도의 QA를 한 번에 생성하고 데이터셋으로 저장합니다."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "342690ff",
- "metadata": {},
- "outputs": [],
- "source": [
- "# 종합 QA 데이터셋 생성\n",
- "print(\"📦 종합 QA 데이터셋 생성 중...\")\n",
- "print(\" (모든 난이도 + Follow-up + Evol-Instruct)\")\n",
- "print(\" ⏳ 약 2-3분 소요될 수 있습니다...\\n\")\n",
- "\n",
- "comprehensive_dataset = qa_generator.generate_comprehensive_qa_dataset(\n",
- " tables=tables,\n",
- " questions_per_difficulty=2,\n",
- " include_followup=True,\n",
- " include_evolution=True,\n",
- " evaluate_quality=False # 평가는 시간이 오래 걸려서 선택적으로\n",
- ")\n",
- "\n",
- "print(\"\\n\" + \"=\"*60)\n",
- "print(\"📊 데이터셋 생성 결과\")\n",
- "print(\"=\"*60)\n",
- "print(f\" 총 QA 쌍: {comprehensive_dataset['metadata'].get('total_qa_pairs', 0)}개\")\n",
- "print(f\" Follow-up 체인: {comprehensive_dataset['metadata'].get('total_followups', 0)}개\")\n",
- "print(f\" 진화된 질문: {comprehensive_dataset['metadata'].get('total_evolved', 0)}개\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c24832a3",
- "metadata": {},
- "outputs": [],
- "source": [
- "# 생성된 QA 미리보기\n",
- "print(\"\\n📋 생성된 QA 샘플 (난이도별 1개씩):\")\n",
- "print(\"=\"*60)\n",
- "\n",
- "shown_difficulties = set()\n",
- "for qa in comprehensive_dataset['qa_pairs']:\n",
- " difficulty = qa.get('difficulty', 'Unknown')\n",
- " if difficulty not in shown_difficulties:\n",
- " shown_difficulties.add(difficulty)\n",
- " print(f\"\\n🔹 [{difficulty}]\")\n",
- " print(f\" Q: {qa.get('question', 'N/A')}\")\n",
- " answer = qa.get('answer', 'N/A')\n",
- " if len(str(answer)) > 150:\n",
- " print(f\" A: {str(answer)[:150]}...\")\n",
- " else:\n",
- " print(f\" A: {answer}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "48fda52a",
- "metadata": {},
- "source": [
- "## 10. 데이터셋 저장"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "7a74e734",
- "metadata": {},
- "outputs": [],
- "source": [
- "# 출력 디렉토리 생성\n",
- "output_dir = Path.cwd() / \"output\"\n",
- "output_dir.mkdir(exist_ok=True)\n",
- "\n",
- "# 타임스탬프 생성\n",
- "timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
- "\n",
- "# JSON 파일로 저장\n",
- "output_file = output_dir / f\"insurance_qa_dataset_{timestamp}.json\"\n",
- "\n",
- "with open(output_file, 'w', encoding='utf-8') as f:\n",
- " json.dump(comprehensive_dataset, f, ensure_ascii=False, indent=2)\n",
- "\n",
- "print(f\"✅ 데이터셋 저장 완료: {output_file}\")\n",
- "print(f\" 파일 크기: {output_file.stat().st_size / 1024:.1f} KB\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "2669b33d",
- "metadata": {},
- "outputs": [],
- "source": [
- "# 간단한 통계 출력용 함수\n",
- "def print_dataset_statistics(dataset: dict):\n",
- " \"\"\"데이터셋 통계 출력\"\"\"\n",
- " print(\"\\n\" + \"=\"*60)\n",
- " print(\"📈 QA 데이터셋 통계\")\n",
- " print(\"=\"*60)\n",
- " \n",
- " # 난이도별 카운트\n",
- " difficulty_counts = {}\n",
- " answer_type_counts = {}\n",
- " \n",
- " for qa in dataset.get('qa_pairs', []):\n",
- " diff = qa.get('difficulty', 'Unknown')\n",
- " atype = qa.get('answer_type', 'Unknown')\n",
- " \n",
- " difficulty_counts[diff] = difficulty_counts.get(diff, 0) + 1\n",
- " answer_type_counts[atype] = answer_type_counts.get(atype, 0) + 1\n",
- " \n",
- " print(\"\\n📊 난이도별 분포:\")\n",
- " for diff, count in sorted(difficulty_counts.items()):\n",
- " bar = \"█\" * count\n",
- " print(f\" {diff:15} | {bar} ({count})\")\n",
- " \n",
- " print(\"\\n📊 답변 유형별 분포:\")\n",
- " for atype, count in sorted(answer_type_counts.items()):\n",
- " bar = \"█\" * count\n",
- " print(f\" {atype:15} | {bar} ({count})\")\n",
- " \n",
- " print(f\"\\n📊 총계:\")\n",
- " print(f\" 총 QA 쌍: {len(dataset.get('qa_pairs', []))}개\")\n",
- " print(f\" Follow-up 체인: {len(dataset.get('followup_chains', []))}개\")\n",
- " print(f\" 진화된 질문: {len(dataset.get('evolved_questions', []))}개\")\n",
- "\n",
- "# 통계 출력\n",
- "print_dataset_statistics(comprehensive_dataset)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "a8e41aa7",
- "metadata": {},
- "source": [
- "## 11. 비동기 대량 생성 (선택사항)\n",
- "\n",
- "더 빠른 생성을 위해 비동기 방식을 사용할 수 있습니다."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "05cf2fa2",
- "metadata": {},
- "outputs": [],
- "source": [
- "# 비동기 QA 생성 (선택사항)\n",
- "async def generate_async_dataset():\n",
- " \"\"\"비동기 방식으로 QA 데이터셋 생성\"\"\"\n",
- " print(\"🚀 비동기 QA 데이터셋 생성 중...\")\n",
- " \n",
- " dataset = await qa_generator.agenerate_comprehensive_qa_dataset(\n",
- " tables=tables,\n",
- " questions_per_difficulty=2\n",
- " )\n",
- " \n",
- " return dataset\n",
- "\n",
- "# Jupyter에서 비동기 실행\n",
- "# async_dataset = await generate_async_dataset()\n",
- "# print(f\"비동기 생성 완료: {len(async_dataset['qa_pairs'])}개 QA\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "3e43bab8",
- "metadata": {},
- "source": [
- "## 12. 커스텀 프롬프트로 특화 QA 생성\n",
- "\n",
- "특정 요구사항에 맞는 커스텀 QA를 생성합니다."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "4b0fbe57",
- "metadata": {},
- "outputs": [],
- "source": [
- "# 커스텀 프롬프트 예시: 수치 계산 중심 QA\n",
- "CUSTOM_CALCULATION_PROMPT = \"\"\"\n",
- "## Task: 수치 계산 중심 QA 생성\n",
- "보험 테이블에서 수치 계산이 필요한 질문을 생성하세요.\n",
- "\n",
- "### Input Tables\n",
- "{tables}\n",
- "\n",
- "### Requirements\n",
- "1. 반드시 수치 계산(사칙연산, 비율, 증감률 등)이 필요한 질문\n",
- "2. 답변은 정확한 숫자로 제공\n",
- "3. 계산 과정(Python 코드) 포함\n",
- "4. 검증 가능한 형태로 출력\n",
- "\n",
- "### Output Format (JSON)\n",
- "```json\n",
- "{{\n",
- " \"questions\": [\n",
- " {{\n",
- " \"id\": \"CALC_001\",\n",
- " \"difficulty\": \"Aggregation\",\n",
- " \"answer_type\": \"calculation\",\n",
- " \"question\": \"질문\",\n",
- " \"answer\": \"수치 답변\",\n",
- " \"calculation_steps\": [\"단계1\", \"단계2\"],\n",
- " \"python_code\": \"검증용 Python 코드\"\n",
- " }}\n",
- " ]\n",
- "}}\n",
- "```\n",
- "\n",
- "### Generate 3 calculation-focused QA pairs.\n",
- "\"\"\"\n",
- "\n",
- "# 커스텀 프롬프트로 생성\n",
- "from QA_example.prompts import format_tables_for_prompt\n",
- "\n",
- "formatted_tables = format_tables_for_prompt(tables)\n",
- "custom_prompt = CUSTOM_CALCULATION_PROMPT.format(tables=formatted_tables)\n",
- "\n",
- "print(\"🔢 수치 계산 중심 QA 생성 중...\")\n",
- "response = qa_generator.pool.generate_content(custom_prompt)\n",
- "\n",
- "# 결과 파싱\n",
- "result = qa_generator._parse_json_response(response)\n",
- "\n",
- "if 'questions' in result:\n",
- " print(f\"\\n✅ {len(result['questions'])}개의 계산 QA 생성 완료\")\n",
- " for qa in result['questions']:\n",
- " print(f\"\\n📌 [{qa.get('id', 'N/A')}]\")\n",
- " print(f\" Q: {qa.get('question', 'N/A')}\")\n",
- " print(f\" A: {qa.get('answer', 'N/A')}\")\n",
- " if 'python_code' in qa:\n",
- " print(f\" Python Code: 포함됨 ✅\")\n",
- "else:\n",
- " print(f\"결과: {result}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "484134d3",
- "metadata": {},
- "source": [
- "## 13. 요약 및 결론\n",
- "\n",
- "### 생성된 QA 데이터셋의 특징\n",
- "\n",
- "| 특징 | 설명 | 커버되는 양상 |\n",
- "|------|------|-------------|\n",
- "| 난이도 다양성 | IR부터 Insight까지 6단계 | #2 |\n",
- "| Multi-Table | 복수 테이블 참조 필요 | #1 |\n",
- "| 답변 유형 | Exact Match, Descriptive, Calculation | #3 |\n",
- "| 수치 계산 | 집계, 비교, 증감률 계산 | #4, #6 |\n",
- "| 꼬리 질문 | Follow-up 체인 생성 | #5 |\n",
- "| 특정 셀 QA | 단일 셀 기반 Q-A | #7 |\n",
- "| Evol-Instruct | 질문 난이도 진화 | #2, #4 |\n",
- "| LLM Judge | 품질 평가 | #3 |"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": ".venv",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.12.11"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/tests/choi/QA_example/qa_generator.py b/tests/choi/QA_example/qa_generator.py
deleted file mode 100644
index b1ab1fe..0000000
--- a/tests/choi/QA_example/qa_generator.py
+++ /dev/null
@@ -1,556 +0,0 @@
-"""
-QA Generator for Insurance Table Data
-보험 테이블 기반 QA 데이터셋 생성기
-
-이 모듈은 Gemini API를 활용하여 보험 테이블 마크다운 데이터로부터
-다양한 난이도와 유형의 QA 데이터셋을 생성합니다.
-
-주요 기능:
-1. 난이도별 QA 생성 (IR, Analysis, Compare, Aggregation, Reasoning, Insight)
-2. Multi-table QA 생성
-3. 꼬리 질문 (Follow-up) 생성
-4. Evol-Instruct 기반 난이도 진화
-5. LLM-as-Judge 품질 평가
-"""
-
-import json
-import asyncio
-import logging
-from pathlib import Path
-from typing import Optional, Dict, Any, List, Union
-from dataclasses import dataclass, asdict
-from enum import Enum
-import sys
-from json_repair import repair_json
-
-# 프로젝트 루트 추가
-project_root = Path(__file__).parent.parent
-sys.path.insert(0, str(project_root))
-
-from polling_gemini import get_gemini_pool, GeminiAPIPool
-
-from .prompts import (
- QA_GENERATOR_SYSTEM_PROMPT,
- IR_QA_PROMPT,
- ANALYSIS_QA_PROMPT,
- COMPARE_QA_PROMPT,
- AGGREGATION_QA_PROMPT,
- REASONING_QA_PROMPT,
- INSIGHT_QA_PROMPT,
- FOLLOWUP_QA_PROMPT,
- MULTI_TABLE_QA_PROMPT,
- EVOL_INSTRUCT_PROMPT,
- QA_EVALUATION_PROMPT,
- get_qa_prompt_by_difficulty,
- format_tables_for_prompt,
-)
-
-# 로깅 설정
-logging.basicConfig(
- level=logging.INFO,
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger(__name__)
-
-
-class QADifficulty(Enum):
- """QA 난이도 레벨"""
- IR = "IR" # Level 1: 단순 정보 검색
- ANALYSIS = "Analysis" # Level 2: 분석적 질문
- COMPARE = "Compare" # Level 3: 비교/Multi-hop
- AGGREGATION = "Aggregation" # Level 4: 집계 연산
- REASONING = "Reasoning" # Level 5: 복합 추론
- INSIGHT = "Insight" # Level 6: 통찰 도출
-
-
-class QAType(Enum):
- """QA 답변 유형"""
- EXACT_MATCH = "exact_match" # 단답형 (정확히 일치)
- DESCRIPTIVE = "descriptive" # 서술형 (LLM Judge 평가)
- CALCULATION = "calculation" # 계산형 (수치 결과)
- COMPARISON = "comparison" # 비교형 (비교 결과 및 근거)
-
-
-@dataclass
-class QAPair:
- """QA 쌍 데이터 클래스"""
- id: str
- difficulty: str
- answer_type: str
- question: str
- answer: str
- reasoning: Optional[str] = None
- evidence: Optional[Dict[str, Any]] = None
- tags: Optional[List[str]] = None
- python_verification: Optional[str] = None
- chain_of_thought: Optional[List[str]] = None
- # 추가 필드들 (LLM이 생성할 수 있는 다양한 필드)
- calculation: Optional[str] = None
- calculation_steps: Optional[List[str]] = None
- assumptions: Optional[List[str]] = None
- supporting_analysis: Optional[str] = None
- key_findings: Optional[List[str]] = None
- required_tables: Optional[List[str]] = None
- join_logic: Optional[str] = None
- python_code: Optional[str] = None
- extra: Optional[Dict[str, Any]] = None # 기타 알 수 없는 필드용
-
- def to_dict(self) -> Dict[str, Any]:
- """딕셔너리로 변환"""
- return {k: v for k, v in asdict(self).items() if v is not None}
-
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> 'QAPair':
- """딕셔너리에서 QAPair 생성 (알 수 없는 필드는 extra에 저장)"""
- # QAPair의 필드 목록
- known_fields = {
- 'id', 'difficulty', 'answer_type', 'question', 'answer',
- 'reasoning', 'evidence', 'tags', 'python_verification',
- 'chain_of_thought', 'calculation', 'calculation_steps',
- 'assumptions', 'supporting_analysis', 'key_findings',
- 'required_tables', 'join_logic', 'python_code', 'extra'
- }
-
- # 알려진 필드와 알 수 없는 필드 분리
- known_data = {}
- extra_data = {}
-
- for key, value in data.items():
- if key in known_fields:
- known_data[key] = value
- else:
- extra_data[key] = value
-
- # 필수 필드 기본값 설정
- known_data.setdefault('id', 'UNKNOWN')
- known_data.setdefault('difficulty', 'Unknown')
- known_data.setdefault('answer_type', 'unknown')
- known_data.setdefault('question', '')
- known_data.setdefault('answer', '')
-
- # extra 필드가 있으면 추가
- if extra_data:
- known_data['extra'] = extra_data
-
- return cls(**known_data)
-
-
-@dataclass
-class EvaluationResult:
- """QA 평가 결과 데이터 클래스"""
- correctness: Dict[str, Any]
- faithfulness: Dict[str, Any]
- relevance: Dict[str, Any]
- difficulty_appropriateness: Dict[str, Any]
- clarity: Dict[str, Any]
- overall_score: float
- passed: bool
- improvement_suggestions: List[str]
-
-
-class InsuranceTableQAGenerator:
- """
- 보험 테이블 기반 QA 데이터셋 생성기
-
- Gemini API Pool을 활용하여 자동 키 로테이션을 지원하며,
- 다양한 난이도와 유형의 QA를 생성합니다.
- """
-
- def __init__(
- self,
- config_path: Optional[str] = None,
- model_name: str = "gemini-2.0-flash",
- ):
- """
- Args:
- config_path: API 키 설정 파일 경로
- model_name: 사용할 Gemini 모델
- """
- self.pool = get_gemini_pool(config_path)
- self.model_name = model_name
- self.system_prompt = QA_GENERATOR_SYSTEM_PROMPT
-
- def _parse_json_response(self, response: str) -> Dict[str, Any]:
- """LLM 응답에서 JSON 추출 및 파싱 (agentjson 사용)"""
-
- try:
- result = repair_json(response, return_objects=True)
-
- if isinstance(result, dict):
- logger.debug(f"JSON 파싱 성공")
- return result
- elif isinstance(result, list):
- logger.warning(f"JSON 파싱 결과가 리스트입니다. 딕셔너리로 변환을 시도합니다.")
- # 질문 리스트라고 가정
- return {"questions": result}
- else:
- logger.warning(f"JSON 파싱 실패 (예상치 못한 타입): {type(result)}")
- return {"error": "parsing_failed", "raw_response": response}
-
- except Exception as e:
- logger.error(f"JSON 파싱 실패: {e}")
- logger.debug(f"원본 응답: {response}")
- return {"error": str(e), "raw_response": response}
-
- def _build_prompt(
- self,
- base_prompt: str,
- tables: Dict[str, str],
- **kwargs
- ) -> str:
- """프롬프트 구성"""
- formatted_tables = format_tables_for_prompt(tables)
-
- prompt = f"{self.system_prompt}\n\n{base_prompt}"
- prompt = prompt.format(tables=formatted_tables, **kwargs)
-
- return prompt
-
- def generate_qa_by_difficulty(
- self,
- tables: Dict[str, str],
- difficulty: QADifficulty,
- num_questions: int = 3,
- **kwargs
- ) -> List[QAPair]:
- """
- 특정 난이도의 QA 생성
-
- Args:
- tables: 테이블 딕셔너리 {table_id: markdown_content}
- difficulty: QA 난이도
- num_questions: 생성할 질문 수
-
- Returns:
- 생성된 QA 쌍 리스트
- """
- base_prompt = get_qa_prompt_by_difficulty(difficulty.value)
- prompt = self._build_prompt(
- base_prompt,
- tables,
- num_questions=num_questions
- )
-
- try:
- response = self.pool.generate_content(prompt)
- result = self._parse_json_response(response)
-
- if "questions" in result:
- return [QAPair.from_dict(q) for q in result["questions"]]
- else:
- logger.warning(f"예상치 못한 응답 형식: {result}")
- return []
-
- except Exception as e:
- logger.error(f"QA 생성 실패: {e}")
- return []
-
- async def agenerate_qa_by_difficulty(
- self,
- tables: Dict[str, str],
- difficulty: QADifficulty,
- num_questions: int = 3,
- **kwargs
- ) -> List[QAPair]:
- """난이도별 QA 생성 (비동기)"""
- base_prompt = get_qa_prompt_by_difficulty(difficulty.value)
- prompt = self._build_prompt(
- base_prompt,
- tables,
- num_questions=num_questions
- )
-
- try:
- response = await self.pool.agenerate_content(prompt)
- result = self._parse_json_response(response)
-
- if "questions" in result:
- return [QAPair.from_dict(q) for q in result["questions"]]
- else:
- return []
-
- except Exception as e:
- logger.error(f"비동기 QA 생성 실패: {e}")
- return []
-
- def generate_multi_table_qa(
- self,
- tables: Dict[str, str],
- num_questions: int = 3,
- ) -> List[QAPair]:
- """
- Multi-table QA 생성
-
- 복수의 테이블을 참조해야 답변 가능한 질문 생성
- """
- if len(tables) < 2:
- logger.warning("Multi-table QA는 최소 2개의 테이블이 필요합니다.")
- # 단일 테이블이라도 시도
-
- prompt = self._build_prompt(
- MULTI_TABLE_QA_PROMPT,
- tables,
- num_questions=num_questions
- )
-
- try:
- response = self.pool.generate_content(prompt)
- result = self._parse_json_response(response)
-
- if "questions" in result:
- return [QAPair.from_dict(q) for q in result["questions"]]
- return []
-
- except Exception as e:
- logger.error(f"Multi-table QA 생성 실패: {e}")
- return []
-
- def generate_followup_qa(
- self,
- tables: Dict[str, str],
- original_qa: QAPair,
- ) -> Dict[str, Any]:
- """
- 꼬리 질문 (Follow-up) 생성
-
- 원래 QA를 기반으로 연속적인 후속 질문 체인 생성
- """
- original_qa_str = json.dumps(original_qa.to_dict(), ensure_ascii=False, indent=2)
-
- prompt = self._build_prompt(
- FOLLOWUP_QA_PROMPT,
- tables,
- original_qa=original_qa_str
- )
-
- try:
- response = self.pool.generate_content(prompt)
- return self._parse_json_response(response)
-
- except Exception as e:
- logger.error(f"Follow-up QA 생성 실패: {e}")
- return {}
-
- def evolve_question(
- self,
- tables: Dict[str, str],
- original_question: str,
- ) -> Dict[str, Any]:
- """
- Evol-Instruct: 질문 난이도 진화
-
- 기본 질문을 더 복잡한 질문으로 진화
- """
- prompt = self._build_prompt(
- EVOL_INSTRUCT_PROMPT,
- tables,
- original_question=original_question
- )
-
- try:
- response = self.pool.generate_content(prompt)
- return self._parse_json_response(response)
-
- except Exception as e:
- logger.error(f"Evol-Instruct 실패: {e}")
- return {}
-
- def evaluate_qa(
- self,
- tables: Dict[str, str],
- qa_pair: QAPair,
- ) -> EvaluationResult:
- """
- LLM-as-Judge: QA 품질 평가
-
- 생성된 QA 쌍의 품질을 다면적으로 평가
- """
- qa_str = json.dumps(qa_pair.to_dict(), ensure_ascii=False, indent=2)
-
- prompt = self._build_prompt(
- QA_EVALUATION_PROMPT,
- tables,
- qa_pair=qa_str
- )
-
- try:
- response = self.pool.generate_content(prompt)
- result = self._parse_json_response(response)
-
- if "evaluation" in result:
- eval_data = result["evaluation"]
- return EvaluationResult(
- correctness=eval_data.get("correctness", {}),
- faithfulness=eval_data.get("faithfulness", {}),
- relevance=eval_data.get("relevance", {}),
- difficulty_appropriateness=eval_data.get("difficulty_appropriateness", {}),
- clarity=eval_data.get("clarity", {}),
- overall_score=result.get("overall_score", 0.0),
- passed=result.get("pass", False),
- improvement_suggestions=result.get("improvement_suggestions", [])
- )
- else:
- logger.warning(f"평가 결과 파싱 실패: {result}")
- return None
-
- except Exception as e:
- logger.error(f"QA 평가 실패: {e}")
- return None
-
- def generate_comprehensive_qa_dataset(
- self,
- tables: Dict[str, str],
- questions_per_difficulty: int = 2,
- include_followup: bool = True,
- include_evolution: bool = True,
- evaluate_quality: bool = False,
- ) -> Dict[str, Any]:
- """
- 종합적인 QA 데이터셋 생성
-
- 모든 난이도의 QA를 생성하고 선택적으로 꼬리질문, 진화, 평가를 수행
-
- Args:
- tables: 테이블 딕셔너리
- questions_per_difficulty: 난이도별 질문 수
- include_followup: 꼬리질문 포함 여부
- include_evolution: Evol-Instruct 포함 여부
- evaluate_quality: 품질 평가 수행 여부
-
- Returns:
- 종합 QA 데이터셋
- """
- dataset = {
- "metadata": {
- "tables_count": len(tables),
- "questions_per_difficulty": questions_per_difficulty,
- },
- "qa_pairs": [],
- "followup_chains": [],
- "evolved_questions": [],
- "evaluations": [],
- }
-
- # 1. 각 난이도별 QA 생성
- for difficulty in QADifficulty:
- logger.info(f"Generating {difficulty.value} level QA...")
- qa_pairs = self.generate_qa_by_difficulty(
- tables, difficulty, questions_per_difficulty
- )
-
- for qa in qa_pairs:
- qa_dict = qa.to_dict()
- dataset["qa_pairs"].append(qa_dict)
-
- # 2. 꼬리질문 생성 (선택적)
- if include_followup and difficulty in [QADifficulty.IR, QADifficulty.ANALYSIS]:
- followup = self.generate_followup_qa(tables, qa)
- if followup:
- dataset["followup_chains"].append(followup)
-
- # 3. 질문 진화 (선택적)
- if include_evolution and difficulty in [QADifficulty.IR, QADifficulty.ANALYSIS]:
- evolved = self.evolve_question(tables, qa.question)
- if evolved:
- dataset["evolved_questions"].append(evolved)
-
- # 4. 품질 평가 (선택적)
- if evaluate_quality:
- evaluation = self.evaluate_qa(tables, qa)
- if evaluation:
- dataset["evaluations"].append({
- "qa_id": qa.id,
- "evaluation": asdict(evaluation)
- })
-
- # 5. Multi-table QA (테이블이 2개 이상인 경우)
- if len(tables) >= 2:
- logger.info("Generating Multi-table QA...")
- multi_qa = self.generate_multi_table_qa(tables, questions_per_difficulty)
- for qa in multi_qa:
- dataset["qa_pairs"].append(qa.to_dict())
-
- # 메타데이터 업데이트
- dataset["metadata"]["total_qa_pairs"] = len(dataset["qa_pairs"])
- dataset["metadata"]["total_followups"] = len(dataset["followup_chains"])
- dataset["metadata"]["total_evolved"] = len(dataset["evolved_questions"])
-
- return dataset
-
- async def agenerate_comprehensive_qa_dataset(
- self,
- tables: Dict[str, str],
- questions_per_difficulty: int = 2,
- ) -> Dict[str, Any]:
- """종합 QA 데이터셋 생성 (비동기)"""
- dataset = {
- "metadata": {
- "tables_count": len(tables),
- "questions_per_difficulty": questions_per_difficulty,
- },
- "qa_pairs": [],
- }
-
- # 모든 난이도에 대해 병렬로 QA 생성
- tasks = [
- self.agenerate_qa_by_difficulty(tables, difficulty, questions_per_difficulty)
- for difficulty in QADifficulty
- ]
-
- results = await asyncio.gather(*tasks)
-
- for qa_list in results:
- for qa in qa_list:
- dataset["qa_pairs"].append(qa.to_dict())
-
- dataset["metadata"]["total_qa_pairs"] = len(dataset["qa_pairs"])
-
- return dataset
-
-
-# =============================================================================
-# Convenience Functions
-# =============================================================================
-
-def generate_qa_from_tables(
- tables: Dict[str, str],
- difficulty: Optional[QADifficulty] = None,
- num_questions: int = 3,
- config_path: Optional[str] = None,
-) -> List[Dict[str, Any]]:
- """
- 테이블에서 QA 생성 (간편 함수)
-
- Args:
- tables: 테이블 딕셔너리 {table_id: markdown_content}
- difficulty: 난이도 (None이면 모든 난이도)
- num_questions: 난이도별 질문 수
- config_path: API 설정 파일 경로
-
- Returns:
- 생성된 QA 리스트
- """
- generator = InsuranceTableQAGenerator(config_path=config_path)
-
- if difficulty:
- qa_pairs = generator.generate_qa_by_difficulty(tables, difficulty, num_questions)
- return [qa.to_dict() for qa in qa_pairs]
- else:
- dataset = generator.generate_comprehensive_qa_dataset(
- tables,
- questions_per_difficulty=num_questions,
- include_followup=False,
- include_evolution=False,
- evaluate_quality=False,
- )
- return dataset["qa_pairs"]
-
-
-async def agenerate_qa_from_tables(
- tables: Dict[str, str],
- num_questions: int = 3,
- config_path: Optional[str] = None,
-) -> Dict[str, Any]:
- """테이블에서 QA 생성 (비동기 간편 함수)"""
- generator = InsuranceTableQAGenerator(config_path=config_path)
- return await generator.agenerate_comprehensive_qa_dataset(tables, num_questions)
diff --git a/tests/choi/Table_example/README.md b/tests/choi/Table_example/README.md
deleted file mode 100644
index f0cc165..0000000
--- a/tests/choi/Table_example/README.md
+++ /dev/null
@@ -1,168 +0,0 @@
-# Table Example - 보험 테이블 추출기
-
-`polling_gemini` 패키지를 활용하여 보험 문서 이미지에서 테이블을 Markdown 형식으로 추출하는 예제입니다.
-
-## 주요 특징
-
-- **VLM + OCR 하이브리드 접근**: Gemini의 시각적 추론 능력과 OCR 텍스트를 결합
-- **보험 도메인 특화 프롬프트**: 계층적 헤더, 셀 병합, 복합 데이터 처리에 최적화
-- **자동 API 키 로테이션**: `polling_gemini` 패키지를 통한 API 키 관리
-- **동기/비동기 지원**: 대용량 처리를 위한 비동기 API 지원
-
-## 설치
-
-프로젝트 루트에서:
-
-```bash
-uv sync
-```
-
-## 사용법
-
-### 1. 기본 사용
-
-```python
-from Table_example import extract_table_from_image
-
-# 이미지에서 테이블 추출
-result = extract_table_from_image("insurance_table.png")
-print(result)
-```
-
-### 2. OCR 참조 텍스트와 함께 사용
-
-```python
-from Table_example import extract_table_from_image
-
-# OCR로 먼저 추출한 텍스트가 있는 경우
-ocr_text = """| 구분 | 보험기간 | 납입기간 |
-| 상해사망 | 80세 | 20년 |"""
-
-result = extract_table_from_image(
- "insurance_table.png",
- ocr_markdown=ocr_text # OCR 결과를 참조로 제공
-)
-```
-
-### 3. 비동기 처리
-
-```python
-import asyncio
-from Table_example import aextract_table_from_image
-
-async def process_multiple_images():
- images = ["table1.png", "table2.png", "table3.png"]
-
- # 동시에 여러 이미지 처리
- tasks = [aextract_table_from_image(img) for img in images]
- results = await asyncio.gather(*tasks)
-
- return results
-
-results = asyncio.run(process_multiple_images())
-```
-
-### 4. InsuranceTableExtractor 클래스 직접 사용
-
-```python
-from Table_example import InsuranceTableExtractor
-
-# 추출기 인스턴스 생성
-extractor = InsuranceTableExtractor(
- config_path="apis/gemini_keys.yaml", # 커스텀 설정 경로
- model_name="gemini-2.5-flash" # 모델 지정
-)
-
-# 테이블 추출
-result = extractor.extract("insurance_table.png")
-
-# API Pool 상태 확인
-status = extractor.get_pool_status()
-print(f"현재 API 키: {status['current_key']['name']}")
-```
-
-## 프롬프트 설계
-
-### System Prompt 핵심 원칙
-
-1. **구조적 완전성**: 병합된 셀을 비정규화하여 모든 행에 값 채우기
-2. **헤더 평탄화**: 다중 행 헤더를 언더스코어(_)로 연결하여 단일 행으로 변환
-3. **데이터 무결성**: 금액 포맷 정리, 퍼센트 유지, 부가 정보 제거
-4. **Hybrid Reference**: OCR 텍스트 참조로 오타 교정
-
-### User Prompt Chain-of-Table 단계
-
-1. **구조 분석**: 레이아웃, 헤더 행 수, 병합 셀 식별
-2. **헤더 처리**: 다중 행 헤더 → 단일 행 변환
-3. **데이터 추출**: 값 추출 및 병합 셀 반복 입력
-4. **포맷팅 검증**: 금액 정리, 컬럼 개수 확인
-
-## 테스트
-
-```bash
-cd TableMagnifier
-python -m Table_example.test_extraction
-```
-
-### 테스트 이미지 준비
-
-`Table_example/sample_images/` 폴더에 테스트할 보험 테이블 이미지를 추가하세요:
-- 지원 형식: PNG, JPG, JPEG, GIF, WebP, BMP
-- 권장: 해상도가 높은 테이블 이미지
-
-## 출력 예시
-
-입력 이미지:
-```
-┌─────────────┬────────────────────────────┐
-│ │ 해지환급금 │
-│ 구분 ├──────────────┬─────────────┤
-│ │ 금액 │ 환급률 │
-├─────────────┼──────────────┼─────────────┤
-│ 1년 │ 100,000원 │ 10% │
-│ 5년 │ 500,000원 │ 50% │
-│ 10년 │ 1,000,000원 │ 100% │
-└─────────────┴──────────────┴─────────────┘
-```
-
-출력 Markdown:
-```markdown
-| 구분 | 해지환급금_금액 | 해지환급금_환급률 |
-| :--- | :--- | :--- |
-| 1년 | 100000 | 10% |
-| 5년 | 500000 | 50% |
-| 10년 | 1000000 | 100% |
-```
-
-## 파일 구조
-
-```
-Table_example/
-├── __init__.py # 패키지 초기화
-├── prompts.py # 시스템/사용자 프롬프트 정의
-├── table_extractor.py # 테이블 추출기 클래스
-├── test_extraction.py # 테스트 코드
-├── sample_images/ # 테스트 이미지 폴더
-└── README.md # 이 문서
-```
-
-## API 키 설정
-
-`apis/gemini_keys.yaml` 파일에 Gemini API 키를 설정하세요:
-
-```yaml
-api_keys:
- - key: "YOUR_GEMINI_API_KEY"
- name: "key1"
- enabled: true
-
-settings:
- model: "gemini-2.5-flash"
- temperature: 0.1
- max_retries: 3
- retry_delay: 2
-```
-
-## 라이센스
-
-MIT License
diff --git a/tests/choi/Table_example/__init__.py b/tests/choi/Table_example/__init__.py
deleted file mode 100644
index 30e122d..0000000
--- a/tests/choi/Table_example/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-"""
-Table Extraction Example
-보험 도메인 특화 테이블 추출 예제
-polling_gemini 패키지를 사용하여 이미지에서 Markdown 테이블 추출
-"""
-
-from .table_extractor import (
- InsuranceTableExtractor,
- extract_table_from_image,
- aextract_table_from_image,
-)
-from .prompts import SYSTEM_PROMPT, USER_PROMPT_TEMPLATE
-
-__all__ = [
- 'InsuranceTableExtractor',
- 'extract_table_from_image',
- 'aextract_table_from_image',
- 'SYSTEM_PROMPT',
- 'USER_PROMPT_TEMPLATE',
-]
diff --git a/tests/choi/Table_example/example_batch.py b/tests/choi/Table_example/example_batch.py
deleted file mode 100644
index 2f348ff..0000000
--- a/tests/choi/Table_example/example_batch.py
+++ /dev/null
@@ -1,167 +0,0 @@
-"""
-비동기 배치 테이블 추출 예제
-여러 이미지를 동시에 처리하여 효율적으로 테이블을 추출합니다.
-"""
-
-import sys
-import asyncio
-import time
-from pathlib import Path
-from typing import List, Tuple, Optional
-
-# 프로젝트 루트를 Python 경로에 추가
-project_root = Path(__file__).parent.parent
-sys.path.insert(0, str(project_root))
-
-from Table_example import aextract_table_from_image, InsuranceTableExtractor
-
-
-async def process_single_image(
- image_path: Path,
- ocr_markdown: str = "N/A"
-) -> Tuple[Path, Optional[str], Optional[str]]:
- """
- 단일 이미지 처리 (비동기)
-
- Returns:
- (이미지 경로, 결과 또는 None, 에러 메시지 또는 None)
- """
- try:
- result = await aextract_table_from_image(
- image_path=image_path,
- ocr_markdown=ocr_markdown
- )
- return (image_path, result, None)
- except Exception as e:
- return (image_path, None, str(e))
-
-
-async def batch_extract_tables(
- image_paths: List[Path],
- max_concurrent: int = 3
-) -> List[Tuple[Path, Optional[str], Optional[str]]]:
- """
- 여러 이미지를 배치로 처리 (동시성 제한)
-
- Args:
- image_paths: 처리할 이미지 경로 리스트
- max_concurrent: 동시 처리 최대 개수
-
- Returns:
- (이미지 경로, 결과, 에러) 튜플 리스트
- """
- # 세마포어로 동시 실행 제한
- semaphore = asyncio.Semaphore(max_concurrent)
-
- async def process_with_semaphore(image_path: Path):
- async with semaphore:
- return await process_single_image(image_path)
-
- # 모든 이미지 동시 처리 (세마포어로 제한)
- tasks = [process_with_semaphore(path) for path in image_paths]
- results = await asyncio.gather(*tasks)
-
- return results
-
-
-def find_all_images(directory: Path) -> List[Path]:
- """디렉토리에서 모든 이미지 파일 찾기"""
- extensions = ["*.png", "*.jpg", "*.jpeg", "*.gif", "*.webp", "*.bmp"]
- images = []
- for ext in extensions:
- images.extend(directory.glob(ext))
- return sorted(images)
-
-
-async def main():
- """
- 배치 처리 예제
- sample_images 폴더의 모든 이미지를 처리합니다.
- """
- print("🏥 보험 테이블 배치 추출 시작")
- print("=" * 70)
-
- # ============================================
- # 📌 설정
- # ============================================
- sample_dir = Path(__file__).parent / "sample_images"
- output_dir = Path(__file__).parent / "output"
- max_concurrent = 3 # 동시 처리 최대 개수
-
- # 출력 디렉토리 생성
- output_dir.mkdir(exist_ok=True)
-
- # ============================================
- # 📌 이미지 파일 수집
- # ============================================
- image_paths = find_all_images(sample_dir)
-
- if not image_paths:
- print(f"❌ 이미지 파일이 없습니다: {sample_dir}")
- print("\n📝 sample_images 폴더에 테이블 이미지를 추가하세요.")
- return
-
- print(f"📁 처리할 이미지: {len(image_paths)}개")
- for i, path in enumerate(image_paths, 1):
- print(f" {i}. {path.name}")
- print(f"⚡ 동시 처리 수: {max_concurrent}")
- print("=" * 70)
-
- # ============================================
- # 🚀 배치 처리 실행
- # ============================================
- start_time = time.time()
-
- results = await batch_extract_tables(
- image_paths=image_paths,
- max_concurrent=max_concurrent
- )
-
- elapsed = time.time() - start_time
-
- # ============================================
- # 📊 결과 처리
- # ============================================
- success_count = 0
- failed_count = 0
-
- print("\n📊 처리 결과:")
- print("-" * 70)
-
- for image_path, result, error in results:
- if result:
- success_count += 1
- status = "✅ 성공"
-
- # 결과 파일 저장
- output_path = output_dir / f"{image_path.stem}_table.md"
- with open(output_path, "w", encoding="utf-8") as f:
- f.write(f"# {image_path.name}\n\n")
- f.write(result)
-
- print(f"\n{status}: {image_path.name}")
- print(f" 💾 저장: {output_path.name}")
- print(f" 📝 미리보기: {result[:100]}...")
- else:
- failed_count += 1
- status = "❌ 실패"
- print(f"\n{status}: {image_path.name}")
- print(f" ⚠️ 에러: {error}")
-
- # ============================================
- # 📈 최종 요약
- # ============================================
- print("\n" + "=" * 70)
- print("📈 처리 완료!")
- print(f" ⏱️ 총 소요 시간: {elapsed:.2f}초")
- print(f" ✅ 성공: {success_count}개")
- print(f" ❌ 실패: {failed_count}개")
- print(f" 📁 출력 폴더: {output_dir}")
-
- if success_count > 0:
- avg_time = elapsed / success_count
- print(f" ⚡ 평균 처리 시간: {avg_time:.2f}초/이미지")
-
-
-if __name__ == "__main__":
- asyncio.run(main())
diff --git a/tests/choi/Table_example/example_simple.py b/tests/choi/Table_example/example_simple.py
deleted file mode 100644
index c0bdbbc..0000000
--- a/tests/choi/Table_example/example_simple.py
+++ /dev/null
@@ -1,97 +0,0 @@
-"""
-간단한 테이블 추출 예제
-실제 이미지 파일 경로를 지정하여 테이블을 추출합니다.
-"""
-
-import sys
-from pathlib import Path
-
-# 프로젝트 루트를 Python 경로에 추가
-project_root = Path(__file__).parent.parent
-sys.path.insert(0, str(project_root))
-
-from Table_example import extract_table_from_image, InsuranceTableExtractor
-
-
-def main():
- """
- 사용 예제:
- 1. sample_images 폴더에 테이블 이미지를 추가하세요
- 2. 아래 IMAGE_PATH를 해당 이미지 경로로 수정하세요
- 3. 스크립트를 실행하세요
- """
-
- # ============================================
- # 📌 이미지 경로 설정
- # ============================================
- # 방법 1: sample_images 폴더의 이미지 사용
- sample_dir = Path(__file__).parent / "sample_images"
-
- # sample_images 폴더의 첫 번째 이미지 자동 선택
- image_files = list(sample_dir.glob("*.png")) + \
- list(sample_dir.glob("*.jpg")) + \
- list(sample_dir.glob("*.jpeg"))
-
- if not image_files:
- print("❌ sample_images 폴더에 이미지가 없습니다.")
- print(f" 경로: {sample_dir}")
- print("\n📝 테이블 이미지를 추가한 후 다시 실행하세요.")
- return
-
- IMAGE_PATH = image_files[0]
-
- # 방법 2: 직접 경로 지정 (주석 해제하여 사용)
- # IMAGE_PATH = "/path/to/your/table_image.png"
-
- # ============================================
- # 📌 OCR 참조 텍스트 (선택적)
- # ============================================
- # OCR로 먼저 추출한 텍스트가 있으면 여기에 입력
- # 이미지가 흐릿할 때 숫자 정확도 향상에 도움됩니다
- OCR_MARKDOWN = "N/A" # 없으면 "N/A"
-
- # 예시:
- # OCR_MARKDOWN = """
- # | 구분 | 보험료 |
- # | 상해 | 10000 |
- # | 질병 | 15000 |
- # """
-
- # ============================================
- # 🚀 테이블 추출 실행
- # ============================================
- print("🏥 보험 테이블 추출 시작")
- print("=" * 60)
- print(f"📁 이미지: {IMAGE_PATH}")
- print(f"📝 OCR 참조: {'있음' if OCR_MARKDOWN != 'N/A' else '없음'}")
- print("=" * 60)
-
- try:
- # 테이블 추출
- result = extract_table_from_image(
- image_path=IMAGE_PATH,
- ocr_markdown=OCR_MARKDOWN
- )
-
- print("\n✅ 추출 완료!")
- print("\n📊 결과 (Markdown Table):")
- print("-" * 60)
- print(result)
- print("-" * 60)
-
- # 결과를 파일로 저장
- output_path = Path(IMAGE_PATH).with_suffix(".md")
- with open(output_path, "w", encoding="utf-8") as f:
- f.write(result)
- print(f"\n💾 결과 저장됨: {output_path}")
-
- except FileNotFoundError as e:
- print(f"❌ 파일을 찾을 수 없습니다: {e}")
- except Exception as e:
- print(f"❌ 추출 실패: {e}")
- import traceback
- traceback.print_exc()
-
-
-if __name__ == "__main__":
- main()
diff --git a/tests/choi/Table_example/output/.gitkeep b/tests/choi/Table_example/output/.gitkeep
deleted file mode 100644
index bc83f8d..0000000
--- a/tests/choi/Table_example/output/.gitkeep
+++ /dev/null
@@ -1,3 +0,0 @@
-# Output
-
-이 폴더에 추출된 테이블 결과가 저장됩니다.
diff --git a/tests/choi/Table_example/prompts.py b/tests/choi/Table_example/prompts.py
deleted file mode 100644
index e928665..0000000
--- a/tests/choi/Table_example/prompts.py
+++ /dev/null
@@ -1,69 +0,0 @@
-
-# System Prompt - 보험 데이터 엔지니어 역할 정의
-SYSTEM_PROMPT = """# Role Definition
-당신은 20년 경력의 '수석 보험 데이터 엔지니어'이자 'OCR 후처리 전문가'입니다.
-당신의 임무는 제공된 [보험 문서 이미지]와 선택적으로 제공되는 [기초 OCR 텍스트]를 분석하여, 데이터베이스 적재가 가능한 완벽한 형태의 'Standardized Markdown Table'로 변환하는 것입니다.
-
-# Core Principles
-1. **구조적 완전성(Structural Integrity):** 시각적으로 병합(Merge)된 셀은 반드시 비정규화(Denormalization)하여 모든 행에 값을 채워야 합니다. 빈칸이나 " 상동" 등의 표현은 금지됩니다.
-2. **헤더 평탄화(Header Flattening):** 2행 이상의 계층적 헤더(Multi-row Headers)는 상위 헤더와 하위 헤더를 언더스코어(_)로 연결하여 단일 행(Single-row) 헤더로 변환합니다. (예: '보장내용' 하위에 '지급금액'이 있다면 -> '보장내용_지급금액')
-3. **데이터 무결성(Data Integrity):**
- - 금액의 천 단위 구분자(,)는 제거합니다. (예: 1,000,000 -> 1000000)
- - 퍼센트(%)는 기호를 포함한 문자열로 유지합니다. (예: 98.5%)
- - 괄호 안의 보조 정보(예: 전년 대비 증감액)는 무시하고 핵심 수치만 추출합니다.
-4. **Hybrid Reference:** [기초 OCR 텍스트]가 제공될 경우, 이미지 내 텍스트가 흐릿하거나 불분명할 때 해당 텍스트를 참조하여 오타를 교정하십시오. 단, 표의 구조(행/열 위치) 판단은 반드시 [원본 이미지]를 기준으로 합니다."""
-
-# User Prompt Template - 단계별 지시사항 포함
-USER_PROMPT_TEMPLATE = """# Task Description
-아래 제공된 입력을 바탕으로 보험 테이블 데이터를 추출하십시오.
-
-# Input Data
-1. **Target Image:** [첨부된 이미지]
-2. **Reference OCR Markdown (Optional):**
-\"\"\"
-{ocr_markdown}
-\"\"\"
-
-# Step-by-Step Instructions (Chain-of-Table)
-단계별로 생각하고 실행하십시오:
-
-**Step 1. 구조 분석 (Structure Analysis)**
-- 이미지를 보고 표의 전체적인 레이아웃을 파악하십시오.
-- 헤더가 몇 개의 행(Row)으로 구성되어 있는지 확인하십시오.
-- 세로로 병합된(Vertically Merged) '구분'이나 '기간' 컬럼이 있는지 식별하십시오.
-
-**Step 2. 헤더 처리 (Header Processing)**
-- 다중 행 헤더를 단일 행 키(Unique Key)로 변환하십시오.
-- 예:
- | 구분 | 해지환급금 |
- | | 금액 | 환급률 |
- -> | 구분 | 해지환급금_금액 | 해지환급금_환급률 |
-
-**Step 3. 데이터 추출 및 채우기 (Extraction & Filling)**
-- 각 행(Row)의 데이터를 추출하십시오.
-- **중요:** 병합된 셀은 해당 범위에 속하는 모든 행에 동일한 값을 반복 입력(Repeat Value)하십시오. 절대 빈 칸으로 두지 마십시오.
-- OCR 참고용 텍스트가 있다면, 숫자의 정확성을 검증하는 데 사용하십시오.
-
-**Step 4. 포맷팅 및 검증 (Formatting & Verification)**
-- 금액에서 '원', ',' 제거 / 정수형 변환.
-- 출력 전, 헤더의 컬럼 개수와 데이터 행의 컬럼 개수가 일치하는지 확인하십시오.
-
-# Output Format
-설명이나 사족 없이 오직 **Markdown Table** 만 출력하십시오.
-
-| 헤더1 | 헤더2_서브1 | 헤더2_서브2 | ... |
-| :--- | :--- | :--- | ... |
-| 값1 | 값2 | 값3 | ... |"""
-
-
-def get_user_prompt(ocr_markdown: str = "N/A") -> str:
- """
- OCR 마크다운 데이터를 포함한 사용자 프롬프트 생성
-
- Args:
- ocr_markdown: OCR로 추출된 마크다운 텍스트 (없으면 "N/A")
-
- Returns:
- 완성된 사용자 프롬프트 문자열
- """
- return USER_PROMPT_TEMPLATE.format(ocr_markdown=ocr_markdown)
diff --git a/tests/choi/Table_example/sample_images/.gitkeep b/tests/choi/Table_example/sample_images/.gitkeep
deleted file mode 100644
index eede5be..0000000
--- a/tests/choi/Table_example/sample_images/.gitkeep
+++ /dev/null
@@ -1,15 +0,0 @@
-# Sample Images
-
-이 폴더에 테스트할 보험 테이블 이미지를 추가하세요.
-
-## 지원 형식
-- PNG
-- JPG / JPEG
-- GIF
-- WebP
-- BMP
-
-## 권장 이미지
-- 해상도가 높은 테이블 이미지
-- 보험 약관, 보험료표, 해지환급금표 등
-- 계층적 헤더가 있는 복잡한 테이블
diff --git a/tests/choi/Table_example/sample_images/I_table_78.md b/tests/choi/Table_example/sample_images/I_table_78.md
deleted file mode 100644
index 727197b..0000000
--- a/tests/choi/Table_example/sample_images/I_table_78.md
+++ /dev/null
@@ -1,6 +0,0 @@
-|구분|XX세|XX+1세|XX+2세|XX+3세|XX+4세|XX+5세|
-|---|---|---|---|---|---|---|
-|나이증가분(A)||1,059|1,357|1,739|2,229|2,855|
-|보험료 산출 기초율
(위험률 등) 증가분
(B=전년도
기준보험료의 최대
25% 가정)||10,846|13,897|17,806|22,815|29,232|
-|기준보험료
(C=전년도
기준보험료+A+B)|42,325|54,321|69,485|89,030|114,074|146,161|
-
diff --git a/tests/choi/Table_example/sample_images/I_table_78.png b/tests/choi/Table_example/sample_images/I_table_78.png
deleted file mode 100644
index 3237899..0000000
Binary files a/tests/choi/Table_example/sample_images/I_table_78.png and /dev/null differ
diff --git a/tests/choi/Table_example/sample_images/qa_output/I_table_78_pair_0_qa.json b/tests/choi/Table_example/sample_images/qa_output/I_table_78_pair_0_qa.json
deleted file mode 100644
index f4d70a8..0000000
--- a/tests/choi/Table_example/sample_images/qa_output/I_table_78_pair_0_qa.json
+++ /dev/null
@@ -1,80 +0,0 @@
-{
- "name": "I_table_78_pair_0",
- "image_paths": [
- "tests/choi/Table_example/sample_images/I_table_78.png"
- ],
- "qa_results": [
- {
- "question": "XX+2세의 기준보험료(C)는 얼마입니까?",
- "answer": "69,485",
- "type": "lookup",
- "reasoning_annotation": "Directly retrieve the value from the '기준보험료 (C)' row and 'XX+2세' column.",
- "context": null
- },
- {
- "question": "기준보험료(C)가 70,000 미만인 연령대는 어디입니까?",
- "answer": "XX세, XX+1세, XX+2세",
- "type": "filter",
- "reasoning_annotation": "Filter the '기준보험료 (C)' row for values less than 70,000 and list the corresponding age categories.",
- "context": null
- },
- {
- "question": "XX+1세부터 XX+5세까지의 나이증가분(A)의 총합은 얼마입니까?",
- "answer": "9,239",
- "type": "aggregate",
- "reasoning_annotation": "Sum the values in the '나이증가분(A)' row for age categories XX+1세 (1,059), XX+2세 (1,357), XX+3세 (1,739), XX+4세 (2,229), and XX+5세 (2,855). (1,059 + 1,357 + 1,739 + 2,229 + 2,855 = 9,239).",
- "context": null
- },
- {
- "question": "XX+4세와 XX+5세 중 나이증가분(A)이 더 높은 연령대는 어디입니까?",
- "answer": "XX+5세",
- "type": "compare",
- "reasoning_annotation": "Compare the '나이증가분(A)' values for XX+4세 (2,229) and XX+5세 (2,855). XX+5세 has a higher value.",
- "context": null
- },
- {
- "question": "XX+5세의 기준보험료(C)와 XX세의 기준보험료(C)의 차이는 얼마입니까?",
- "answer": "103,836",
- "type": "arithmetic",
- "reasoning_annotation": "Subtract the '기준보험료 (C)' value for XX세 (42,325) from the '기준보험료 (C)' value for XX+5세 (146,161). (146,161 - 42,325 = 103,836).",
- "context": null
- },
- {
- "question": "XX세부터 XX+5세까지 기준보험료(C)의 전반적인 추세는 어떻습니까?",
- "answer": "연령이 증가함에 따라 기준보험료(C)가 지속적으로 증가하는 상승 추세를 보입니다.",
- "type": "temporal",
- "reasoning_annotation": "Observe the values in the '기준보험료 (C)' row across all age categories (42,325, 54,321, 69,485, 89,030, 114,074, 146,161). All values show a consistent increase.",
- "context": null
- },
- {
- "question": "나이증가분(A)이 1,739인 연령대의 기준보험료(C)는 얼마입니까?",
- "answer": "89,030",
- "type": "multi_hop",
- "reasoning_annotation": "First, find the age category where '나이증가분(A)' is 1,739, which is 'XX+3세'. Then, retrieve the '기준보험료 (C)' value for 'XX+3세'.",
- "context": null
- },
- {
- "question": "위험률 등 기초율 변화로 인한 추가 비용이 가장 높은 연령대는 어디입니까?",
- "answer": "XX+5세",
- "type": "implicit_reference",
- "reasoning_annotation": "The question implicitly refers to '보험료 산출 기초율 (위험률 등) 증가분 (B)'. Find the maximum value in this row, which is 29,232 for 'XX+5세'.",
- "context": null
- },
- {
- "question": "XX+4세 바로 다음 연령대의 기준보험료는 얼마입니까?",
- "answer": "146,161",
- "type": "ellipsis",
- "reasoning_annotation": "The question implicitly asks for the '기준보험료 (C)' of the age category immediately following 'XX+4세', which is 'XX+5세'. Retrieve the value from the '기준보험료 (C)' row and 'XX+5세' column.",
- "context": null
- },
- {
- "question": "정부의 보험료 안정화 방안에 따라, 보험료 인상률을 재조정해야 하는 연령대는 어디입니까?",
- "answer": "XX+4세, XX+5세",
- "type": "long_sequence",
- "reasoning_annotation": "According to the provided context, identify age categories where '보험료 산출 기초율 증가분(B)' exceeds 20,000. These are XX+4세 (22,815) and XX+5세 (29,232).",
- "context": "정부의 보험료 안정화 방안에 따라, '보험료 산출 기초율 증가분(B)'이 20,000을 초과하는 연령대에 대해서는 보험료 인상률을 재조정해야 한다."
- }
- ],
- "token_usage": 8086,
- "errors": []
-}
\ No newline at end of file
diff --git a/tests/choi/Table_example/sample_images/qa_output/_summary.json b/tests/choi/Table_example/sample_images/qa_output/_summary.json
deleted file mode 100644
index 1d6400d..0000000
--- a/tests/choi/Table_example/sample_images/qa_output/_summary.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
- "folder": "tests/choi/Table_example/sample_images",
- "total": 1,
- "success": 1,
- "failed": 0,
- "qa_only": true,
- "provider": "gemini_pool",
- "model": "gpt-4o-mini",
- "checkpointing_enabled": false,
- "checkpoint_dir": null,
- "results": [
- {
- "name": "I_table_78_pair_0",
- "status": "success",
- "qa_count": 10,
- "output_file": "tests/choi/Table_example/sample_images/qa_output/I_table_78_pair_0_qa.json",
- "errors": []
- }
- ]
-}
\ No newline at end of file
diff --git a/tests/choi/Table_example/table_extractor.py b/tests/choi/Table_example/table_extractor.py
deleted file mode 100644
index 4024079..0000000
--- a/tests/choi/Table_example/table_extractor.py
+++ /dev/null
@@ -1,295 +0,0 @@
-"""
-보험 테이블 추출기
-polling_gemini 패키지를 사용하여 이미지에서 테이블을 Markdown으로 변환
-"""
-
-import os
-import asyncio
-import base64
-import logging
-from pathlib import Path
-from typing import Optional, Dict, Any, Union
-import google.generativeai as genai
-from google.api_core import exceptions as google_exceptions
-
-from .prompts import SYSTEM_PROMPT, get_user_prompt
-
-# polling_gemini 패키지에서 API Pool 가져오기
-import sys
-project_root = Path(__file__).parent.parent
-sys.path.insert(0, str(project_root))
-
-from polling_gemini import get_gemini_pool, GeminiAPIPool
-
-# 로깅 설정
-logger = logging.getLogger(__name__)
-
-
-def load_image_as_base64(image_path: Union[str, Path]) -> str:
- """이미지 파일을 Base64로 인코딩"""
- image_path = Path(image_path)
- if not image_path.exists():
- raise FileNotFoundError(f"이미지 파일을 찾을 수 없습니다: {image_path}")
-
- with open(image_path, "rb") as f:
- return base64.b64encode(f.read()).decode("utf-8")
-
-
-def get_image_mime_type(image_path: Union[str, Path]) -> str:
- """이미지 파일의 MIME 타입 반환"""
- image_path = Path(image_path)
- suffix = image_path.suffix.lower()
-
- mime_types = {
- ".jpg": "image/jpeg",
- ".jpeg": "image/jpeg",
- ".png": "image/png",
- ".gif": "image/gif",
- ".webp": "image/webp",
- ".bmp": "image/bmp",
- }
-
- return mime_types.get(suffix, "image/jpeg")
-
-
-class InsuranceTableExtractor:
- """
- 보험 문서 이미지에서 테이블을 추출하는 클래스
-
- polling_gemini의 GeminiAPIPool을 활용하여 자동 API 키 로테이션을 지원합니다.
- """
-
- def __init__(
- self,
- config_path: Optional[str] = None,
- model_name: str = "gemini-2.5-flash",
- ):
- """
- Args:
- config_path: API 키 설정 파일 경로 (None이면 기본 경로 사용)
- model_name: 사용할 Gemini 모델 이름
- """
- self.pool = get_gemini_pool(config_path)
- self.model_name = model_name
- self.system_prompt = SYSTEM_PROMPT
-
- def _create_multimodal_content(
- self,
- image_path: Union[str, Path],
- ocr_markdown: str = "N/A"
- ) -> list:
- """
- 멀티모달 콘텐츠 생성 (이미지 + 텍스트)
-
- Args:
- image_path: 이미지 파일 경로
- ocr_markdown: 참조용 OCR 마크다운 텍스트
-
- Returns:
- Gemini API에 전달할 콘텐츠 리스트
- """
- # 이미지 로드
- image_data = load_image_as_base64(image_path)
- mime_type = get_image_mime_type(image_path)
-
- # 사용자 프롬프트 생성
- user_prompt = get_user_prompt(ocr_markdown)
-
- # 멀티모달 콘텐츠 구성
- content = [
- # 시스템 프롬프트를 포함한 전체 프롬프트
- f"{self.system_prompt}\n\n{user_prompt}",
- # 이미지 데이터
- {
- "mime_type": mime_type,
- "data": image_data
- }
- ]
-
- return content
-
- def extract(
- self,
- image_path: Union[str, Path],
- ocr_markdown: str = "N/A",
- **kwargs
- ) -> str:
- """
- 이미지에서 테이블을 추출하여 Markdown으로 반환 (동기)
-
- Args:
- image_path: 테이블이 포함된 이미지 파일 경로
- ocr_markdown: 참조용 OCR 마크다운 텍스트 (선택적)
- **kwargs: 추가 생성 파라미터
-
- Returns:
- 추출된 Markdown 테이블 문자열
- """
- content = self._create_multimodal_content(image_path, ocr_markdown)
-
- # API Pool의 현재 설정된 모델 사용
- # 멀티모달 요청을 위해 직접 genai 호출
- max_retries = self.pool.settings.get('max_retries', 3)
- retry_delay = self.pool.settings.get('retry_delay', 2)
-
- last_error = None
- attempts = 0
- max_attempts = len(self.pool.api_keys) * max_retries
-
- while attempts < max_attempts:
- current_key = self.pool.api_keys[self.pool.current_key_index]
-
- try:
- # 현재 키로 Gemini 설정
- genai.configure(api_key=current_key.key)
- model = genai.GenerativeModel(self.model_name)
-
- # 생성 설정
- generation_config = {
- 'temperature': self.pool.settings.get('temperature', 0.1),
- }
- generation_config.update(kwargs.get('generation_config', {}))
-
- # API 호출
- response = model.generate_content(
- content,
- generation_config=generation_config
- )
-
- # 성공 시 실패 카운트 리셋
- current_key.failed_count = 0
- current_key.last_error = None
-
- return response.text
-
- except google_exceptions.ResourceExhausted as e:
- logger.warning(f"API 키 '{current_key.name}' 할당량 초과. 다음 키로 전환합니다.")
- current_key.failed_count += 1
- current_key.last_error = str(e)
- last_error = e
-
- if not self.pool._rotate_key():
- break
-
- except Exception as e:
- logger.warning(f"API 호출 실패 (키: {current_key.name}): {e}")
- current_key.failed_count += 1
- current_key.last_error = str(e)
- last_error = e
-
- if self.pool._is_quota_error(e):
- if not self.pool._rotate_key():
- break
- else:
- import time
- time.sleep(retry_delay)
-
- attempts += 1
-
- error_msg = f"모든 API 키로 시도했으나 실패했습니다. 마지막 에러: {last_error}"
- logger.error(error_msg)
- raise Exception(error_msg)
-
- async def aextract(
- self,
- image_path: Union[str, Path],
- ocr_markdown: str = "N/A",
- **kwargs
- ) -> str:
- """
- 이미지에서 테이블을 추출하여 Markdown으로 반환 (비동기)
-
- Args:
- image_path: 테이블이 포함된 이미지 파일 경로
- ocr_markdown: 참조용 OCR 마크다운 텍스트 (선택적)
- **kwargs: 추가 생성 파라미터
-
- Returns:
- 추출된 Markdown 테이블 문자열
- """
- # 동기 메서드를 비동기로 래핑
- loop = asyncio.get_event_loop()
- return await loop.run_in_executor(
- None,
- lambda: self.extract(image_path, ocr_markdown, **kwargs)
- )
-
- def get_pool_status(self) -> Dict[str, Any]:
- """현재 API Pool 상태 반환"""
- return {
- 'current_key': self.pool.get_current_key_info(),
- 'all_keys': self.pool.get_all_keys_status()
- }
-
-
-# 편의 함수들
-
-def extract_table_from_image(
- image_path: Union[str, Path],
- ocr_markdown: str = "N/A",
- config_path: Optional[str] = None,
- **kwargs
-) -> str:
- """
- 이미지에서 보험 테이블을 추출하는 간편 함수 (동기)
-
- Args:
- image_path: 테이블이 포함된 이미지 파일 경로
- ocr_markdown: 참조용 OCR 마크다운 텍스트 (선택적)
- config_path: API 키 설정 파일 경로
- **kwargs: 추가 생성 파라미터
-
- Returns:
- 추출된 Markdown 테이블 문자열
-
- Example:
- ```python
- from Table_example import extract_table_from_image
-
- # 기본 사용
- result = extract_table_from_image("insurance_table.png")
- print(result)
-
- # OCR 참조 텍스트와 함께 사용
- result = extract_table_from_image(
- "insurance_table.png",
- ocr_markdown="| 구분 | 금액 |\\n| 보험료 | 10000 |"
- )
- ```
- """
- extractor = InsuranceTableExtractor(config_path=config_path)
- return extractor.extract(image_path, ocr_markdown, **kwargs)
-
-
-async def aextract_table_from_image(
- image_path: Union[str, Path],
- ocr_markdown: str = "N/A",
- config_path: Optional[str] = None,
- **kwargs
-) -> str:
- """
- 이미지에서 보험 테이블을 추출하는 간편 함수 (비동기)
-
- Args:
- image_path: 테이블이 포함된 이미지 파일 경로
- ocr_markdown: 참조용 OCR 마크다운 텍스트 (선택적)
- config_path: API 키 설정 파일 경로
- **kwargs: 추가 생성 파라미터
-
- Returns:
- 추출된 Markdown 테이블 문자열
-
- Example:
- ```python
- import asyncio
- from Table_example import aextract_table_from_image
-
- async def main():
- result = await aextract_table_from_image("insurance_table.png")
- print(result)
-
- asyncio.run(main())
- ```
- """
- extractor = InsuranceTableExtractor(config_path=config_path)
- return await extractor.aextract(image_path, ocr_markdown, **kwargs)
diff --git a/tests/choi/Table_example/test_extraction.ipynb b/tests/choi/Table_example/test_extraction.ipynb
deleted file mode 100644
index fc7a143..0000000
--- a/tests/choi/Table_example/test_extraction.ipynb
+++ /dev/null
@@ -1,610 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "id": "03a4228b",
- "metadata": {},
- "source": [
- "## 1. 환경 설정"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "id": "ac52abd7",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "프로젝트 루트: /Users/jaehyeokchoi/Desktop/chois_toy/private/TableMagnifier\n"
- ]
- }
- ],
- "source": [
- "import sys\n",
- "from pathlib import Path\n",
- "\n",
- "# 프로젝트 루트를 Python 경로에 추가\n",
- "project_root = Path.cwd().parent\n",
- "if str(project_root) not in sys.path:\n",
- " sys.path.insert(0, str(project_root))\n",
- "\n",
- "print(f\"프로젝트 루트: {project_root}\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "3f5862d6",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "✅ Table_example 패키지 로드 완료!\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/jaehyeokchoi/Desktop/chois_toy/private/TableMagnifier/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
- " from .autonotebook import tqdm as notebook_tqdm\n"
- ]
- }
- ],
- "source": [
- "# Table_example 패키지 임포트\n",
- "from Table_example import (\n",
- " InsuranceTableExtractor,\n",
- " extract_table_from_image,\n",
- " aextract_table_from_image,\n",
- " SYSTEM_PROMPT,\n",
- " USER_PROMPT_TEMPLATE,\n",
- ")\n",
- "from Table_example.prompts import get_user_prompt\n",
- "\n",
- "print(\"✅ Table_example 패키지 로드 완료!\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7e2f995a",
- "metadata": {},
- "source": [
- "## 2. 프롬프트 확인\n",
- "\n",
- "보험 도메인 특화 프롬프트를 확인합니다."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "95f59ce1",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "📋 System Prompt:\n",
- "======================================================================\n",
- "# Role Definition\n",
- "당신은 20년 경력의 '수석 보험 데이터 엔지니어'이자 'OCR 후처리 전문가'입니다.\n",
- "당신의 임무는 제공된 [보험 문서 이미지]와 선택적으로 제공되는 [기초 OCR 텍스트]를 분석하여, 데이터베이스 적재가 가능한 완벽한 형태의 'Standardized Markdown Table'로 변환하는 것입니다.\n",
- "\n",
- "# Core Principles\n",
- "1. **구조적 완전성(Structural Integrity):** 시각적으로 병합(Merge)된 셀은 반드시 비정규화(Denormalization)하여 모든 행에 값을 채워야 합니다. 빈칸이나 \" 상동\" 등의 표현은 금지됩니다.\n",
- "2. **헤더 평탄화(Header Flattening):** 2행 이상의 계층적 헤더(Multi-row Headers)는 상위 헤더와 하위 헤더를 언더스코어(_)로 연결하여 단일 행(Single-row) 헤더로 변환합니다. (예: '보장내용' 하위에 '지급금액'이 있다면 -> '보장내용_지급금액')\n",
- "3. **데이터 무결성(Data Integrity):**\n",
- " - 금액의 천 단위 구분자(,)는 제거합니다. (예: 1,000,000 -> 1000000)\n",
- " - 퍼센트(%)는 기호를 포함한 문자열로 유지합니다. (예: 98.5%)\n",
- " - 괄호 안의 보조 정보(예: 전년 대비 증감액)는 무시하고 핵심 수치만 추출합니다.\n",
- "4. **Hybrid Reference:** [기초 OCR 텍스트]가 제공될 경우, 이미지 내 텍스트가 흐릿하거나 불분명할 때 해당 텍스트를 참조하여 오타를 교정하십시오. 단, 표의 구조(행/열 위치) 판단은 반드시 [원본 이미지]를 기준으로 합니다.\n",
- "======================================================================\n"
- ]
- }
- ],
- "source": [
- "# System Prompt 확인\n",
- "print(\"📋 System Prompt:\")\n",
- "print(\"=\" * 70)\n",
- "print(SYSTEM_PROMPT)\n",
- "print(\"=\" * 70)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "id": "3bd4aaa6",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "📋 User Prompt Template:\n",
- "======================================================================\n",
- "# Task Description\n",
- "아래 제공된 입력을 바탕으로 보험 테이블 데이터를 추출하십시오.\n",
- "\n",
- "# Input Data\n",
- "1. **Target Image:** [첨부된 이미지]\n",
- "2. **Reference OCR Markdown (Optional):**\n",
- "\"\"\"\n",
- "{ocr_markdown}\n",
- "\"\"\"\n",
- "\n",
- "# Step-by-Step Instructions (Chain-of-Table)\n",
- "단계별로 생각하고 실행하십시오:\n",
- "\n",
- "**Step 1. 구조 분석 (Structure Analysis)**\n",
- "- 이미지를 보고 표의 전체적인 레이아웃을 파악하십시오.\n",
- "- 헤더가 몇 개의 행(Row)으로 구성되어 있는지 확인하십시오.\n",
- "- 세로로 병합된(Vertically Merged) '구분'이나 '기간' 컬럼이 있는지 식별하십시오.\n",
- "\n",
- "**Step 2. 헤더 처리 (Header Processing)**\n",
- "- 다중 행 헤더를 단일 행 키(Unique Key)로 변환하십시오.\n",
- "- 예:\n",
- " | 구분 | 해지환급금 |\n",
- " | | 금액 | 환급률 |\n",
- " -> | 구분 | 해지환급금_금액 | 해지환급금_환급률 |\n",
- "\n",
- "**Step 3. 데이터 추출 및 채우기 (Extraction & Filling)**\n",
- "- 각 행(Row)의 데이터를 추출하십시오.\n",
- "- **중요:** 병합된 셀은 해당 범위에 속하는 모든 행에 동일한 값을 반복 입력(Repeat Value)하십시오. 절대 빈 칸으로 두지 마십시오.\n",
- "- OCR 참고용 텍스트가 있다면, 숫자의 정확성을 검증하는 데 사용하십시오.\n",
- "\n",
- "**Step 4. 포맷팅 및 검증 (Formatting & Verification)**\n",
- "- 금액에서 '원', ',' 제거 / 정수형 변환.\n",
- "- 출력 전, 헤더의 컬럼 개수와 데이터 행의 컬럼 개수가 일치하는지 확인하십시오.\n",
- "\n",
- "# Output Format\n",
- "설명이나 사족 없이 오직 **Markdown Table** 만 출력하십시오.\n",
- "\n",
- "| 헤더1 | 헤더2_서브1 | 헤더2_서브2 | ... |\n",
- "| :--- | :--- | :--- | ... |\n",
- "| 값1 | 값2 | 값3 | ... |\n",
- "======================================================================\n"
- ]
- }
- ],
- "source": [
- "# User Prompt Template 확인\n",
- "print(\"📋 User Prompt Template:\")\n",
- "print(\"=\" * 70)\n",
- "print(USER_PROMPT_TEMPLATE)\n",
- "print(\"=\" * 70)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "efe2d4d4",
- "metadata": {},
- "source": [
- "## 3. 추출기 초기화\n",
- "\n",
- "API 키 설정 확인 및 추출기를 초기화합니다."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "id": "4d6f9bcb",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "✅ API 키 설정 파일 확인됨: /Users/jaehyeokchoi/Desktop/chois_toy/private/TableMagnifier/apis/gemini_keys.yaml\n"
- ]
- }
- ],
- "source": [
- "# API 키 설정 파일 확인\n",
- "config_path = project_root / \"apis\" / \"gemini_keys.yaml\"\n",
- "\n",
- "if not config_path.exists():\n",
- " print(f\"❌ API 키 설정 파일이 없습니다: {config_path}\")\n",
- " print(\"\\n다음 단계를 수행하세요:\")\n",
- " print(\"1. apis/gemini_keys-example.yaml을 apis/gemini_keys.yaml로 복사\")\n",
- " print(\"2. 실제 Gemini API 키를 입력\")\n",
- " print(\"3. Google AI Studio에서 무료 API 키 발급:\")\n",
- " print(\" https://makersuite.google.com/app/apikey\")\n",
- "else:\n",
- " print(f\"✅ API 키 설정 파일 확인됨: {config_path}\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "id": "08823fe6",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-12-02 14:27:15,381 - polling_gemini.api_pool - INFO - 총 3개의 API 키를 로드했습니다.\n",
- "2025-12-02 14:27:15,382 - polling_gemini.api_pool - INFO - API 키 'key1' 사용 중 (모델: gemini-2.5-flash)\n",
- "2025-12-02 14:27:15,382 - polling_gemini.api_pool - INFO - API 키 'key1' 사용 중 (모델: gemini-2.5-flash)\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "✅ InsuranceTableExtractor 초기화 성공!\n",
- "\n",
- "현재 사용 중인 API 키: key1\n",
- "총 API 키 수: 3\n",
- "\n",
- "전체 API 키 상태:\n",
- " - key1: 활성화=True, 실패횟수=0\n",
- " - key2: 활성화=True, 실패횟수=0\n",
- " - key3: 활성화=True, 실패횟수=0\n"
- ]
- }
- ],
- "source": [
- "# InsuranceTableExtractor 초기화\n",
- "try:\n",
- " extractor = InsuranceTableExtractor()\n",
- " \n",
- " # API Pool 상태 확인\n",
- " status = extractor.get_pool_status()\n",
- " \n",
- " print(\"✅ InsuranceTableExtractor 초기화 성공!\")\n",
- " print(f\"\\n현재 사용 중인 API 키: {status['current_key']['name']}\")\n",
- " print(f\"총 API 키 수: {status['current_key']['total_keys']}\")\n",
- " \n",
- " print(\"\\n전체 API 키 상태:\")\n",
- " for key in status['all_keys']:\n",
- " print(f\" - {key['name']}: 활성화={key['enabled']}, 실패횟수={key['failed_count']}\")\n",
- " \n",
- "except FileNotFoundError as e:\n",
- " print(f\"❌ API 키 파일 오류: {e}\")\n",
- "except Exception as e:\n",
- " print(f\"❌ 초기화 실패: {e}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "41a30785",
- "metadata": {},
- "source": [
- "## 4. 샘플 이미지 확인\n",
- "\n",
- "테스트할 이미지 파일을 확인합니다."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "id": "dd007a21",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "📁 샘플 이미지 디렉토리: /Users/jaehyeokchoi/Desktop/chois_toy/private/TableMagnifier/Table_example/sample_images\n",
- "📷 발견된 이미지: 1개\n",
- " 1. I_table_78.png\n"
- ]
- }
- ],
- "source": [
- "# 샘플 이미지 디렉토리 확인\n",
- "sample_images_dir = Path.cwd() / \"sample_images\"\n",
- "\n",
- "if not sample_images_dir.exists():\n",
- " sample_images_dir.mkdir(parents=True, exist_ok=True)\n",
- " print(f\"📁 샘플 이미지 디렉토리 생성됨: {sample_images_dir}\")\n",
- "\n",
- "# 이미지 파일 검색\n",
- "image_extensions = [\"*.png\", \"*.jpg\", \"*.jpeg\", \"*.gif\", \"*.webp\", \"*.bmp\"]\n",
- "image_files = []\n",
- "for ext in image_extensions:\n",
- " image_files.extend(sample_images_dir.glob(ext))\n",
- "\n",
- "print(f\"\\n📁 샘플 이미지 디렉토리: {sample_images_dir}\")\n",
- "print(f\"📷 발견된 이미지: {len(image_files)}개\")\n",
- "\n",
- "if image_files:\n",
- " for i, img in enumerate(image_files, 1):\n",
- " print(f\" {i}. {img.name}\")\n",
- "else:\n",
- " print(\"\\n⚠️ 테스트할 이미지를 sample_images 폴더에 추가하세요!\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "7f0f1733",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "📷 첫 번째 이미지 미리보기:\n"
- ]
- },
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "image/png": {
- "width": 600
- }
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "# 이미지 미리보기 (IPython 사용)\n",
- "if image_files:\n",
- " from IPython.display import Image, display\n",
- " \n",
- " print(\"📷 첫 번째 이미지 미리보기:\")\n",
- " display(Image(filename=str(image_files[0]), width=600))"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "43bd0f11",
- "metadata": {},
- "source": [
- "## 5. 기본 테이블 추출\n",
- "\n",
- "이미지에서 테이블을 추출합니다."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "id": "101765ab",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "🎯 추출 대상 이미지: I_table_78.png\n"
- ]
- }
- ],
- "source": [
- "# 추출할 이미지 선택\n",
- "if image_files:\n",
- " # 첫 번째 이미지 사용\n",
- " target_image = image_files[0]\n",
- " print(f\"🎯 추출 대상 이미지: {target_image.name}\")\n",
- "else:\n",
- " print(\"❌ 이미지가 없습니다. sample_images 폴더에 이미지를 추가하세요.\")\n",
- " target_image = None"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "id": "8b4d4f8a",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "🚀 테이블 추출 시작: I_table_78.png\n",
- "======================================================================\n",
- "\n",
- "✅ 추출 완료!\n",
- "\n",
- "📊 결과 (Markdown Table):\n",
- "----------------------------------------------------------------------\n",
- "| 구분 | XX세 | XX+1세 | XX+2세 | XX+3세 | XX+4세 | XX+5세 |\n",
- "| :--- | :--- | :--- | :--- | :--- | :--- | :--- |\n",
- "| 나이증가분(A) | | 1059 | 1357 | 1739 | 2229 | 2855 |\n",
- "| 보험료 산출 기초율 (위험률 등) 증가분 (B=전년도 기준보험료의 최대 25% 가정) | | 10846 | 13897 | 17806 | 22815 | 29232 |\n",
- "| 기준보험료 (C=전년도 기준보험료+A+B) | 42325 | 54321 | 69485 | 89030 | 114074 | 146161 |\n",
- "----------------------------------------------------------------------\n",
- "CPU times: user 21.5 ms, sys: 14.8 ms, total: 36.3 ms\n",
- "Wall time: 14.7 s\n",
- "\n",
- "✅ 추출 완료!\n",
- "\n",
- "📊 결과 (Markdown Table):\n",
- "----------------------------------------------------------------------\n",
- "| 구분 | XX세 | XX+1세 | XX+2세 | XX+3세 | XX+4세 | XX+5세 |\n",
- "| :--- | :--- | :--- | :--- | :--- | :--- | :--- |\n",
- "| 나이증가분(A) | | 1059 | 1357 | 1739 | 2229 | 2855 |\n",
- "| 보험료 산출 기초율 (위험률 등) 증가분 (B=전년도 기준보험료의 최대 25% 가정) | | 10846 | 13897 | 17806 | 22815 | 29232 |\n",
- "| 기준보험료 (C=전년도 기준보험료+A+B) | 42325 | 54321 | 69485 | 89030 | 114074 | 146161 |\n",
- "----------------------------------------------------------------------\n",
- "CPU times: user 21.5 ms, sys: 14.8 ms, total: 36.3 ms\n",
- "Wall time: 14.7 s\n"
- ]
- }
- ],
- "source": [
- "%%time\n",
- "# 테이블 추출 실행\n",
- "if target_image:\n",
- " print(f\"🚀 테이블 추출 시작: {target_image.name}\")\n",
- " print(\"=\" * 70)\n",
- " \n",
- " try:\n",
- " result = extract_table_from_image(target_image)\n",
- " \n",
- " print(\"\\n✅ 추출 완료!\")\n",
- " print(\"\\n📊 결과 (Markdown Table):\")\n",
- " print(\"-\" * 70)\n",
- " print(result)\n",
- " print(\"-\" * 70)\n",
- " \n",
- " except Exception as e:\n",
- " print(f\"❌ 추출 실패: {e}\")\n",
- " import traceback\n",
- " traceback.print_exc()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "bc284731",
- "metadata": {},
- "source": [
- "## 6. OCR 참조 텍스트와 함께 추출\n",
- "\n",
- "OCR로 먼저 추출한 텍스트를 참조로 제공하여 정확도를 높입니다."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "id": "361949ff",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "✅ OCR 참조 파일 로드됨: I_table_78.md\n",
- "\n",
- "📝 OCR 참조 텍스트:\n",
- "----------------------------------------------------------------------\n",
- "|구분|XX세|XX+1세|XX+2세|XX+3세|XX+4세|XX+5세|\n",
- "|---|---|---|---|---|---|---|\n",
- "|나이증가분(A)||1,059|1,357|1,739|2,229|2,855|\n",
- "|보험료 산출 기초율
(위험률 등) 증가분
(B=전년도
기준보험료의 최대
25% 가정)||10,846|13,897|17,806|22,815|29,232|\n",
- "|기준보험료
(C=전년도
기준보험료+A+B)|42,325|54,321|69,485|89,030|114,074|146,161|\n",
- "----------------------------------------------------------------------\n"
- ]
- }
- ],
- "source": [
- "# OCR 참조 텍스트 로드 (이미지와 같은 이름의 .md 파일에서)\n",
- "# 예: I_table_78.png → I_table_78.md\n",
- "\n",
- "if target_image:\n",
- " # 이미지와 같은 이름의 .md 파일 찾기\n",
- " ocr_md_path = target_image.with_suffix(\".md\")\n",
- " \n",
- " if ocr_md_path.exists():\n",
- " with open(ocr_md_path, \"r\", encoding=\"utf-8\") as f:\n",
- " sample_ocr_markdown = f.read().strip()\n",
- " print(f\"✅ OCR 참조 파일 로드됨: {ocr_md_path.name}\")\n",
- " print(\"\\n📝 OCR 참조 텍스트:\")\n",
- " print(\"-\" * 70)\n",
- " print(sample_ocr_markdown)\n",
- " print(\"-\" * 70)\n",
- " else:\n",
- " sample_ocr_markdown = None\n",
- " print(f\"⚠️ OCR 참조 파일이 없습니다: {ocr_md_path.name}\")\n",
- " print(\" 이미지와 같은 이름의 .md 파일을 sample_images 폴더에 추가하세요.\")\n",
- "else:\n",
- " sample_ocr_markdown = None\n",
- " print(\"❌ 대상 이미지가 없습니다.\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "id": "8495b93f",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "🚀 OCR 참조와 함께 테이블 추출: I_table_78.png\n",
- "======================================================================\n",
- "\n",
- "✅ 추출 완료!\n",
- "\n",
- "📊 결과 (OCR 참조 사용):\n",
- "----------------------------------------------------------------------\n",
- "|구분|XX세|XX+1세|XX+2세|XX+3세|XX+4세|XX+5세|\n",
- "|:---|:---|:---|:---|:---|:---|:---|:---|\n",
- "|나이증가분(A)||1059|1357|1739|2229|2855|\n",
- "|보험료 산출 기초율 (위험률 등) 증가분 (B=전년도 기준보험료의 최대 25% 가정)||10846|13897|17806|22815|29232|\n",
- "|기준보험료 (C=전년도 기준보험료+A+B)|42325|54321|69485|89030|114074|146161|\n",
- "----------------------------------------------------------------------\n",
- "CPU times: user 17.5 ms, sys: 17.2 ms, total: 34.7 ms\n",
- "Wall time: 20.8 s\n",
- "\n",
- "✅ 추출 완료!\n",
- "\n",
- "📊 결과 (OCR 참조 사용):\n",
- "----------------------------------------------------------------------\n",
- "|구분|XX세|XX+1세|XX+2세|XX+3세|XX+4세|XX+5세|\n",
- "|:---|:---|:---|:---|:---|:---|:---|:---|\n",
- "|나이증가분(A)||1059|1357|1739|2229|2855|\n",
- "|보험료 산출 기초율 (위험률 등) 증가분 (B=전년도 기준보험료의 최대 25% 가정)||10846|13897|17806|22815|29232|\n",
- "|기준보험료 (C=전년도 기준보험료+A+B)|42325|54321|69485|89030|114074|146161|\n",
- "----------------------------------------------------------------------\n",
- "CPU times: user 17.5 ms, sys: 17.2 ms, total: 34.7 ms\n",
- "Wall time: 20.8 s\n"
- ]
- }
- ],
- "source": [
- "%%time\n",
- "# OCR 참조와 함께 테이블 추출\n",
- "if target_image and sample_ocr_markdown:\n",
- " print(f\"🚀 OCR 참조와 함께 테이블 추출: {target_image.name}\")\n",
- " print(\"=\" * 70)\n",
- " \n",
- " try:\n",
- " result_with_ocr = extract_table_from_image(\n",
- " target_image,\n",
- " ocr_markdown=sample_ocr_markdown\n",
- " )\n",
- " \n",
- " print(\"\\n✅ 추출 완료!\")\n",
- " print(\"\\n📊 결과 (OCR 참조 사용):\")\n",
- " print(\"-\" * 70)\n",
- " print(result_with_ocr)\n",
- " print(\"-\" * 70)\n",
- " \n",
- " except Exception as e:\n",
- " print(f\"❌ 추출 실패: {e}\")\n",
- "elif target_image and not sample_ocr_markdown:\n",
- " print(\"⚠️ OCR 참조 파일이 없어서 이 섹션을 건너뜁니다.\")\n",
- " print(f\" {target_image.stem}.md 파일을 sample_images 폴더에 추가하세요.\")\n",
- "else:\n",
- " print(\"❌ 이미지가 없습니다.\")"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": ".venv",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.12.11"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/tests/choi/Table_example/test_extraction.py b/tests/choi/Table_example/test_extraction.py
deleted file mode 100644
index fb4e153..0000000
--- a/tests/choi/Table_example/test_extraction.py
+++ /dev/null
@@ -1,346 +0,0 @@
-"""
-보험 테이블 추출 테스트 코드
-Table_example 패키지를 사용하여 이미지에서 테이블을 추출하는 예제
-"""
-
-import sys
-import asyncio
-from pathlib import Path
-
-# 프로젝트 루트를 Python 경로에 추가
-project_root = Path(__file__).parent.parent
-sys.path.insert(0, str(project_root))
-
-from Table_example import (
- InsuranceTableExtractor,
- extract_table_from_image,
- aextract_table_from_image,
- SYSTEM_PROMPT,
- USER_PROMPT_TEMPLATE,
-)
-
-
-def test_prompt_display():
- """1. 프롬프트 내용 확인"""
- print("\n" + "="*70)
- print("테스트 1: 프롬프트 내용 확인")
- print("="*70)
-
- print("\n📋 System Prompt:")
- print("-"*50)
- print(SYSTEM_PROMPT[:500] + "...")
-
- print("\n📋 User Prompt Template (일부):")
- print("-"*50)
- print(USER_PROMPT_TEMPLATE[:500] + "...")
-
- print("\n✅ 프롬프트 로드 성공!")
- return True
-
-
-def test_extractor_initialization():
- """2. 추출기 초기화 테스트"""
- print("\n" + "="*70)
- print("테스트 2: InsuranceTableExtractor 초기화")
- print("="*70)
-
- try:
- extractor = InsuranceTableExtractor()
-
- status = extractor.get_pool_status()
- print(f"\n현재 사용 중인 키: {status['current_key']['name']}")
- print(f"총 API 키 수: {status['current_key']['total_keys']}")
-
- print("\n✅ 추출기 초기화 성공!")
- return True
-
- except FileNotFoundError as e:
- print(f"\n⚠️ API 키 파일이 없습니다: {e}")
- print("apis/gemini_keys.yaml 파일을 생성하세요.")
- return False
- except Exception as e:
- print(f"\n❌ 초기화 실패: {e}")
- return False
-
-
-def test_extract_from_sample_image():
- """3. 샘플 이미지에서 테이블 추출 테스트"""
- print("\n" + "="*70)
- print("테스트 3: 샘플 이미지에서 테이블 추출")
- print("="*70)
-
- # 샘플 이미지 경로 확인
- sample_images_dir = project_root / "Table_example" / "sample_images"
-
- if not sample_images_dir.exists():
- print(f"\n⚠️ 샘플 이미지 디렉토리가 없습니다: {sample_images_dir}")
- print("sample_images 폴더에 테스트 이미지를 추가하세요.")
- return None
-
- # 샘플 이미지 찾기
- image_files = list(sample_images_dir.glob("*.png")) + \
- list(sample_images_dir.glob("*.jpg")) + \
- list(sample_images_dir.glob("*.jpeg"))
-
- if not image_files:
- print(f"\n⚠️ 샘플 이미지가 없습니다: {sample_images_dir}")
- print("PNG, JPG, JPEG 형식의 이미지를 추가하세요.")
- return None
-
- # 첫 번째 이미지로 테스트
- image_path = image_files[0]
- print(f"\n테스트 이미지: {image_path.name}")
-
- try:
- # 테이블 추출
- result = extract_table_from_image(image_path)
-
- print("\n📊 추출 결과:")
- print("-"*50)
- print(result)
- print("-"*50)
-
- print("\n✅ 테이블 추출 성공!")
- return result
-
- except Exception as e:
- print(f"\n❌ 추출 실패: {e}")
- import traceback
- traceback.print_exc()
- return None
-
-
-def test_extract_with_ocr_reference():
- """4. OCR 참조 텍스트와 함께 추출 테스트"""
- print("\n" + "="*70)
- print("테스트 4: OCR 참조 텍스트와 함께 추출")
- print("="*70)
-
- sample_images_dir = project_root / "Table_example" / "sample_images"
-
- if not sample_images_dir.exists():
- print(f"\n⚠️ 샘플 이미지 디렉토리가 없습니다.")
- return None
-
- image_files = list(sample_images_dir.glob("*.png")) + \
- list(sample_images_dir.glob("*.jpg"))
-
- if not image_files:
- print(f"\n⚠️ 샘플 이미지가 없습니다.")
- return None
-
- image_path = image_files[0]
- print(f"\n테스트 이미지: {image_path.name}")
-
- # OCR 참조 텍스트 예시 (실제로는 OCR 결과를 사용)
- sample_ocr_markdown = """| 구분 | 보험기간 | 납입기간 | 가입금액 |
-| 상해사망 | 80세 | 20년 | 1억원 |
-| 질병사망 | 80세 | 20년 | 5천만원 |"""
-
- try:
- result = extract_table_from_image(
- image_path,
- ocr_markdown=sample_ocr_markdown
- )
-
- print("\n📊 추출 결과 (OCR 참조 사용):")
- print("-"*50)
- print(result)
- print("-"*50)
-
- print("\n✅ OCR 참조 추출 성공!")
- return result
-
- except Exception as e:
- print(f"\n❌ 추출 실패: {e}")
- import traceback
- traceback.print_exc()
- return None
-
-
-def test_async_extraction():
- """5. 비동기 테이블 추출 테스트"""
- print("\n" + "="*70)
- print("테스트 5: 비동기 테이블 추출")
- print("="*70)
-
- async def async_test():
- sample_images_dir = project_root / "Table_example" / "sample_images"
-
- if not sample_images_dir.exists():
- print(f"\n⚠️ 샘플 이미지 디렉토리가 없습니다.")
- return None
-
- image_files = list(sample_images_dir.glob("*.png")) + \
- list(sample_images_dir.glob("*.jpg"))
-
- if not image_files:
- print(f"\n⚠️ 샘플 이미지가 없습니다.")
- return None
-
- image_path = image_files[0]
- print(f"\n테스트 이미지: {image_path.name}")
-
- try:
- import time
- start_time = time.time()
-
- result = await aextract_table_from_image(image_path)
-
- elapsed = time.time() - start_time
-
- print(f"\n⏱️ 소요 시간: {elapsed:.2f}초")
- print("\n📊 추출 결과:")
- print("-"*50)
- print(result[:500] + "..." if len(result) > 500 else result)
- print("-"*50)
-
- print("\n✅ 비동기 추출 성공!")
- return result
-
- except Exception as e:
- print(f"\n❌ 추출 실패: {e}")
- import traceback
- traceback.print_exc()
- return None
-
- return asyncio.run(async_test())
-
-
-def test_multiple_images():
- """6. 여러 이미지 동시 처리 테스트"""
- print("\n" + "="*70)
- print("테스트 6: 여러 이미지 동시 처리 (비동기)")
- print("="*70)
-
- async def async_test():
- sample_images_dir = project_root / "Table_example" / "sample_images"
-
- if not sample_images_dir.exists():
- print(f"\n⚠️ 샘플 이미지 디렉토리가 없습니다.")
- return None
-
- image_files = list(sample_images_dir.glob("*.png")) + \
- list(sample_images_dir.glob("*.jpg")) + \
- list(sample_images_dir.glob("*.jpeg"))
-
- if len(image_files) < 2:
- print(f"\n⚠️ 2개 이상의 샘플 이미지가 필요합니다. 현재: {len(image_files)}개")
- return None
-
- # 최대 3개 이미지 처리
- images_to_process = image_files[:3]
-
- print(f"\n처리할 이미지 {len(images_to_process)}개:")
- for img in images_to_process:
- print(f" - {img.name}")
-
- try:
- import time
- start_time = time.time()
-
- # 동시에 여러 이미지 처리
- tasks = [aextract_table_from_image(img) for img in images_to_process]
- results = await asyncio.gather(*tasks, return_exceptions=True)
-
- elapsed = time.time() - start_time
-
- print(f"\n⏱️ 총 소요 시간: {elapsed:.2f}초")
-
- for i, (img, result) in enumerate(zip(images_to_process, results), 1):
- print(f"\n📊 이미지 {i} ({img.name}) 결과:")
- print("-"*40)
- if isinstance(result, Exception):
- print(f"❌ 에러: {result}")
- else:
- print(result[:300] + "..." if len(result) > 300 else result)
-
- success_count = sum(1 for r in results if not isinstance(r, Exception))
- print(f"\n✅ {len(images_to_process)}개 중 {success_count}개 성공!")
- return results
-
- except Exception as e:
- print(f"\n❌ 처리 실패: {e}")
- import traceback
- traceback.print_exc()
- return None
-
- return asyncio.run(async_test())
-
-
-def main():
- """모든 테스트 실행"""
- print("\n" + "🏥 " * 20)
- print("보험 테이블 추출 테스트 시작")
- print("🏥 " * 20)
-
- # API 키 설정 확인
- config_path = project_root / "apis" / "gemini_keys.yaml"
- if not config_path.exists():
- print(f"\n❌ 오류: API 키 설정 파일이 없습니다: {config_path}")
- print("\n다음 단계를 수행하세요:")
- print("1. apis/gemini_keys-example.yaml을 apis/gemini_keys.yaml로 복사")
- print("2. 실제 Gemini API 키를 입력")
- print("3. Google AI Studio에서 무료 API 키 발급:")
- print(" https://makersuite.google.com/app/apikey")
- return
-
- # 샘플 이미지 디렉토리 확인/생성
- sample_images_dir = project_root / "Table_example" / "sample_images"
- if not sample_images_dir.exists():
- sample_images_dir.mkdir(parents=True, exist_ok=True)
- print(f"\n📁 샘플 이미지 디렉토리를 생성했습니다: {sample_images_dir}")
- print("⚠️ 테스트할 보험 테이블 이미지를 이 폴더에 추가하세요.")
-
- # 테스트 실행
- tests = [
- ("프롬프트 확인", test_prompt_display),
- ("추출기 초기화", test_extractor_initialization),
- ("이미지 추출", test_extract_from_sample_image),
- ("OCR 참조 추출", test_extract_with_ocr_reference),
- ("비동기 추출", test_async_extraction),
- ("다중 이미지", test_multiple_images),
- ]
-
- results = []
- for name, test_func in tests:
- try:
- result = test_func()
- # None은 이미지가 없어서 스킵된 경우
- if result is None:
- results.append((name, "skipped"))
- else:
- results.append((name, "success"))
- except KeyboardInterrupt:
- print("\n\n⚠️ 사용자가 테스트를 중단했습니다.")
- break
- except Exception as e:
- print(f"\n❌ 예상치 못한 오류: {e}")
- results.append((name, "failed"))
-
- # 결과 요약
- print("\n" + "=" * 70)
- print("테스트 결과 요약")
- print("=" * 70)
-
- for name, status in results:
- if status == "success":
- icon = "✅"
- elif status == "skipped":
- icon = "⏭️ "
- else:
- icon = "❌"
- print(f"{icon} {name}: {status}")
-
- success_count = sum(1 for _, s in results if s == "success")
- skipped_count = sum(1 for _, s in results if s == "skipped")
- total_count = len(results)
-
- print(f"\n총 {total_count}개 테스트 중 {success_count}개 성공, {skipped_count}개 스킵")
-
- if skipped_count > 0:
- print("\n💡 팁: sample_images 폴더에 보험 테이블 이미지를 추가하면 더 많은 테스트가 실행됩니다.")
-
-
-if __name__ == "__main__":
- main()
diff --git a/tests/choi/upload_fin.txt b/tests/choi/upload_fin.txt
deleted file mode 100644
index da757cb..0000000
--- a/tests/choi/upload_fin.txt
+++ /dev/null
@@ -1 +0,0 @@
-I_origin_0/I_table_1.png
diff --git a/tests/test_flow.py b/tests/test_flow.py
deleted file mode 100644
index 417c125..0000000
--- a/tests/test_flow.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import pytest
-from unittest.mock import MagicMock
-from langchain_core.messages import AIMessage
-from generate_synthetic_table.flow import build_synthetic_table_graph, TableState
-
-@pytest.fixture
-def mock_llm():
- llm = MagicMock()
- llm.invoke.return_value = AIMessage(content="Mock response")
- return llm
-
-def test_graph_compilation(mock_llm):
- graph = build_synthetic_table_graph(mock_llm)
- assert graph is not None
-
-def test_image_to_html_node(mock_llm):
- # This is a bit harder to test without mocking the file system or _load_prompt
- # For now, we just check if the graph builds and runs with a mock
- pass
diff --git a/tests/test_html_to_image.py b/tests/test_html_to_image.py
deleted file mode 100644
index e90c721..0000000
--- a/tests/test_html_to_image.py
+++ /dev/null
@@ -1,41 +0,0 @@
-
-from pathlib import Path
-import pytest
-from generate_synthetic_table.html_to_image import capture_html_as_image
-
-def test_capture_html_as_image(tmp_path):
- html = """
-
-
-
-
-
- Test Table
-
-
- | Name |
- Age |
-
-
- | Alice |
- 30 |
-
-
- | Bob |
- 25 |
-
-
-
-
- """
-
- output_path = tmp_path / "test_table.png"
- capture_html_as_image(html, output_path)
-
- assert output_path.exists()
- assert output_path.stat().st_size > 0
- print(f"Image saved to {output_path}")
diff --git a/tests/test_utils.py b/tests/test_utils.py
deleted file mode 100644
index d78f755..0000000
--- a/tests/test_utils.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import pytest
-from generate_synthetic_table.validators import robust_json_parse, validate_html
-
-def test_robust_json_parse_valid():
- assert robust_json_parse('{"a": 1}') == {"a": 1}
-
-def test_robust_json_parse_markdown():
- text = """
- Here is the json:
- ```json
- {
- "key": "value"
- }
- ```
- """
- assert robust_json_parse(text) == {"key": "value"}
-
-def test_robust_json_parse_markdown_no_lang():
- text = """
- ```
- {"x": [1, 2]}
- ```
- """
- assert robust_json_parse(text) == {"x": [1, 2]}
-
-def test_robust_json_parse_dirty():
- text = "Sure! {\"a\": 1} is the answer."
- assert robust_json_parse(text) == {"a": 1}
-
-def test_validate_html_valid():
- html = ""
- assert validate_html(html) is True
-
-def test_validate_html_invalid_structure():
- html = "Not a table
"
- assert validate_html(html) is False
-
-def test_validate_html_empty():
- assert validate_html("") is False
diff --git a/tests/test_validation.py b/tests/test_validation.py
deleted file mode 100644
index 3b72a1b..0000000
--- a/tests/test_validation.py
+++ /dev/null
@@ -1,354 +0,0 @@
-"""3단계 검증 체인 테스트 스크립트 (standalone)"""
-
-import re
-import logging
-from typing import Optional, Any, List, Tuple
-from bs4 import BeautifulSoup
-
-# pandas/pandasql import
-try:
- import pandas as pd
- from pandasql import sqldf
- PANDAS_SQL_AVAILABLE = True
-except ImportError:
- PANDAS_SQL_AVAILABLE = False
- pd = None
- sqldf = None
-
-# REPL import
-try:
- from langchain_experimental.tools import PythonREPLTool
- REPL_AVAILABLE = True
-except ImportError:
- REPL_AVAILABLE = False
- PythonREPLTool = None
-
-logger = logging.getLogger(__name__)
-
-
-def parse_html_table_to_json(html: str) -> Optional[dict]:
- """HTML 테이블을 JSON으로 파싱"""
- if not html:
- return None
-
- try:
- soup = BeautifulSoup(html, "html.parser")
- table = soup.find("table")
- if not table:
- return None
-
- rows = table.find_all("tr")
- if not rows:
- return None
-
- max_cols = 0
- for row in rows:
- cols = sum(int(cell.get("colspan", 1)) for cell in row.find_all(["td", "th"]))
- max_cols = max(max_cols, cols)
-
- if max_cols == 0:
- return None
-
- grid = []
- for row_idx, row in enumerate(rows):
- while len(grid) <= row_idx:
- grid.append([None] * max_cols)
-
- col_idx = 0
- for cell in row.find_all(["td", "th"]):
- while col_idx < max_cols and grid[row_idx][col_idx] is not None:
- col_idx += 1
-
- if col_idx >= max_cols:
- break
-
- cell_text = cell.get_text(strip=True)
- colspan = int(cell.get("colspan", 1))
- rowspan = int(cell.get("rowspan", 1))
-
- for r in range(rowspan):
- for c in range(colspan):
- target_row = row_idx + r
- target_col = col_idx + c
- while len(grid) <= target_row:
- grid.append([None] * max_cols)
- if target_col < max_cols:
- grid[target_row][target_col] = cell_text
-
- col_idx += colspan
-
- raw_rows = [[cell if cell is not None else "" for cell in row] for row in grid]
- if not raw_rows:
- return None
-
- has_header = bool(rows[0].find_all("th"))
-
- if has_header and len(raw_rows) > 1:
- headers = raw_rows[0]
- data = raw_rows[1:]
- else:
- headers = [f"col_{i+1}" for i in range(len(raw_rows[0]))]
- data = raw_rows
-
- return {"headers": headers, "data": data, "raw_rows": raw_rows}
- except Exception as e:
- logger.error(f"HTML parsing failed: {e}")
- return None
-
-
-def html_table_to_dataframe(html: str) -> Optional[Any]:
- """HTML 테이블을 DataFrame으로 변환"""
- if not PANDAS_SQL_AVAILABLE:
- return None
-
- parsed = parse_html_table_to_json(html)
- if not parsed:
- return None
-
- try:
- headers = parsed["headers"]
- data = parsed["data"]
-
- clean_headers = []
- for i, h in enumerate(headers):
- clean = re.sub(r'[^\w가-힣]', '_', str(h).strip())
- clean = re.sub(r'_+', '_', clean).strip('_')
- if not clean or clean[0].isdigit():
- clean = f"col_{i}"
- clean_headers.append(clean)
-
- seen = {}
- final_headers = []
- for h in clean_headers:
- if h in seen:
- seen[h] += 1
- final_headers.append(f"{h}_{seen[h]}")
- else:
- seen[h] = 0
- final_headers.append(h)
-
- return pd.DataFrame(data, columns=final_headers)
- except Exception as e:
- logger.error(f"DataFrame conversion failed: {e}")
- return None
-
-
-def compare_tables_with_sql(original_html: str, synthetic_html: str) -> Tuple[bool, List[str]]:
- """pandasql로 두 테이블 비교"""
- if not PANDAS_SQL_AVAILABLE:
- return True, ["pandas/pandasql not available"]
-
- issues = []
-
- df_original = html_table_to_dataframe(original_html)
- df_synthetic = html_table_to_dataframe(synthetic_html)
-
- if df_original is None:
- return False, ["원본 테이블 변환 실패"]
- if df_synthetic is None:
- return False, ["합성 테이블 변환 실패"]
-
- orig_shape = df_original.shape
- synth_shape = df_synthetic.shape
-
- if orig_shape[1] != synth_shape[1]:
- issues.append(f"열 수 불일치: 원본={orig_shape[1]}, 합성={synth_shape[1]}")
-
- if orig_shape[0] != synth_shape[0]:
- issues.append(f"행 수 불일치: 원본={orig_shape[0]}, 합성={synth_shape[0]}")
-
- if orig_shape[1] != synth_shape[1]:
- return False, issues
-
- try:
- common_cols = [f"c{i}" for i in range(orig_shape[1])]
- df_original.columns = common_cols
- df_synthetic.columns = common_cols
-
- env = {"df_original": df_original, "df_synthetic": df_synthetic}
-
- numeric_cols = df_original.select_dtypes(include=['number']).columns.tolist()
-
- for col in numeric_cols:
- try:
- sum_query = f"""
- SELECT
- (SELECT COALESCE(SUM(CAST({col} AS REAL)), 0) FROM df_original) as orig_sum,
- (SELECT COALESCE(SUM(CAST({col} AS REAL)), 0) FROM df_synthetic) as synth_sum
- """
- sum_result = sqldf(sum_query, env)
- orig_sum = sum_result.iloc[0]['orig_sum']
- synth_sum = sum_result.iloc[0]['synth_sum']
-
- if abs(orig_sum - synth_sum) > 0.01:
- issues.append(f"열 '{col}' 합계 불일치: 원본={orig_sum:.2f}, 합성={synth_sum:.2f}")
- except:
- pass
-
- except Exception as e:
- issues.append(f"SQL 비교 오류: {e}")
-
- passed = len([i for i in issues if not i.startswith(" -")]) == 0
- return passed, issues
-
-
-def compare_tables_with_repl(original_html: str, synthetic_html: str) -> Tuple[bool, List[str]]:
- """REPL로 두 테이블 비교"""
- if not REPL_AVAILABLE or not PANDAS_SQL_AVAILABLE:
- return True, ["REPL 사용 불가"]
-
- df_original = html_table_to_dataframe(original_html)
- df_synthetic = html_table_to_dataframe(synthetic_html)
-
- if df_original is None or df_synthetic is None:
- return False, ["DataFrame 변환 실패"]
-
- issues = []
-
- try:
- # Shape 비교
- if df_original.shape != df_synthetic.shape:
- issues.append(f"Shape 불일치: 원본={df_original.shape}, 합성={df_synthetic.shape}")
-
- # 행 수 비교
- if len(df_original) != len(df_synthetic):
- issues.append(f"행 수 불일치: 원본={len(df_original)}, 합성={len(df_synthetic)}")
-
- # 숫자 컬럼 합계 비교
- for col in df_original.columns:
- if col in df_synthetic.columns:
- try:
- orig_sum = pd.to_numeric(df_original[col], errors='coerce').sum()
- synth_sum = pd.to_numeric(df_synthetic[col], errors='coerce').sum()
- if pd.notna(orig_sum) and pd.notna(synth_sum):
- if abs(orig_sum - synth_sum) > 0.01:
- issues.append(f"컬럼 '{col}' 합계 불일치: 원본={orig_sum:.2f}, 합성={synth_sum:.2f}")
- except:
- pass
-
- except Exception as e:
- issues.append(f"REPL 비교 오류: {e}")
-
- return len(issues) == 0, issues
-
-print("=" * 60)
-print("3단계 검증 체인 테스트")
-print("=" * 60)
-
-# 패키지 상태 확인
-print(f"\n📦 패키지 상태:")
-print(f" - pandas/pandasql: {'✅ 사용 가능' if PANDAS_SQL_AVAILABLE else '❌ 없음'}")
-print(f" - langchain REPL: {'✅ 사용 가능' if REPL_AVAILABLE else '❌ 없음'}")
-
-# 테스트용 HTML 테이블
-original_html = """
-
-
- | 이름 | 나이 | 점수 |
-
-
- | 김철수 | 25 | 85 |
- | 이영희 | 30 | 92 |
- | 박민수 | 28 | 78 |
-
-
-"""
-
-# 테스트 1: 동일한 테이블 비교 (PASS 예상)
-print("\n" + "=" * 60)
-print("테스트 1: 동일한 테이블 비교 (PASS 예상)")
-print("=" * 60)
-
-identical_synthetic = """
-
- | 이름 | 나이 | 점수 |
- | 김철수 | 25 | 85 |
- | 이영희 | 30 | 92 |
- | 박민수 | 28 | 78 |
-
-"""
-
-print("\n[1/3] pandasql 검증...")
-passed, issues = compare_tables_with_sql(original_html, identical_synthetic)
-print(f" 결과: {'✅ PASS' if passed else '❌ FAIL'}")
-if issues:
- print(f" 이슈: {issues}")
-
-print("\n[2/3] REPL 검증...")
-passed, issues = compare_tables_with_repl(original_html, identical_synthetic)
-print(f" 결과: {'✅ PASS' if passed else '❌ FAIL'}")
-if issues:
- print(f" 이슈: {issues}")
-
-# 테스트 2: 행 수가 다른 테이블 (FAIL 예상)
-print("\n" + "=" * 60)
-print("테스트 2: 행 수가 다른 테이블 (FAIL 예상)")
-print("=" * 60)
-
-different_rows = """
-
- | 이름 | 나이 | 점수 |
- | 김철수 | 25 | 85 |
- | 이영희 | 30 | 92 |
-
-"""
-
-print("\n[1/3] pandasql 검증...")
-passed, issues = compare_tables_with_sql(original_html, different_rows)
-print(f" 결과: {'✅ PASS' if passed else '❌ FAIL'}")
-if issues:
- for issue in issues[:5]:
- print(f" - {issue}")
-
-print("\n[2/3] REPL 검증...")
-passed, issues = compare_tables_with_repl(original_html, different_rows)
-print(f" 결과: {'✅ PASS' if passed else '❌ FAIL'}")
-if issues:
- for issue in issues[:5]:
- print(f" - {issue}")
-
-# 테스트 3: 숫자 값이 다른 테이블 (FAIL 예상)
-print("\n" + "=" * 60)
-print("테스트 3: 숫자 값이 다른 테이블 (FAIL 예상)")
-print("=" * 60)
-
-different_values = """
-
- | 이름 | 나이 | 점수 |
- | 김철수 | 25 | 90 |
- | 이영희 | 30 | 95 |
- | 박민수 | 28 | 80 |
-
-"""
-
-print("\n[1/3] pandasql 검증...")
-passed, issues = compare_tables_with_sql(original_html, different_values)
-print(f" 결과: {'✅ PASS' if passed else '❌ FAIL'}")
-if issues:
- for issue in issues[:5]:
- print(f" - {issue}")
-
-print("\n[2/3] REPL 검증...")
-passed, issues = compare_tables_with_repl(original_html, different_values)
-print(f" 결과: {'✅ PASS' if passed else '❌ FAIL'}")
-if issues:
- for issue in issues[:5]:
- print(f" - {issue}")
-
-# DataFrame 변환 테스트
-print("\n" + "=" * 60)
-print("DataFrame 변환 테스트")
-print("=" * 60)
-
-df = html_table_to_dataframe(original_html)
-if df is not None:
- print(f"\n변환된 DataFrame:")
- print(df)
- print(f"\nShape: {df.shape}")
- print(f"Columns: {list(df.columns)}")
-else:
- print("❌ DataFrame 변환 실패")
-
-print("\n" + "=" * 60)
-print("테스트 완료!")
-print("=" * 60)
diff --git a/uv.lock b/uv.lock
index 9e69bc9..6daa4bd 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
version = 1
-revision = 2
+revision = 3
requires-python = ">=3.12"
resolution-markers = [
"python_full_version >= '3.14'",
@@ -2425,6 +2425,25 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/73/cb/ac7874b3e5d58441674fb70742e6c374b28b0c7cb988d37d991cde47166c/platformdirs-4.5.0-py3-none-any.whl", hash = "sha256:e578a81bb873cbb89a41fcc904c7ef523cc18284b7e3b3ccf06aca1403b7ebd3", size = 18651, upload-time = "2025-10-08T17:44:47.223Z" },
]
+[[package]]
+name = "playwright"
+version = "1.57.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "greenlet" },
+ { name = "pyee" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ed/b6/e17543cea8290ae4dced10be21d5a43c360096aa2cce0aa7039e60c50df3/playwright-1.57.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:9351c1ac3dfd9b3820fe7fc4340d96c0d3736bb68097b9b7a69bd45d25e9370c", size = 41985039, upload-time = "2025-12-09T08:06:18.408Z" },
+ { url = "https://files.pythonhosted.org/packages/8b/04/ef95b67e1ff59c080b2effd1a9a96984d6953f667c91dfe9d77c838fc956/playwright-1.57.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a4a9d65027bce48eeba842408bcc1421502dfd7e41e28d207e94260fa93ca67e", size = 40775575, upload-time = "2025-12-09T08:06:22.105Z" },
+ { url = "https://files.pythonhosted.org/packages/60/bd/5563850322a663956c927eefcf1457d12917e8f118c214410e815f2147d1/playwright-1.57.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:99104771abc4eafee48f47dac2369e0015516dc1ce8c409807d2dd440828b9a4", size = 41985042, upload-time = "2025-12-09T08:06:25.357Z" },
+ { url = "https://files.pythonhosted.org/packages/56/61/3a803cb5ae0321715bfd5247ea871d25b32c8f372aeb70550a90c5f586df/playwright-1.57.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:284ed5a706b7c389a06caa431b2f0ba9ac4130113c3a779767dda758c2497bb1", size = 45975252, upload-time = "2025-12-09T08:06:29.186Z" },
+ { url = "https://files.pythonhosted.org/packages/83/d7/b72eb59dfbea0013a7f9731878df8c670f5f35318cedb010c8a30292c118/playwright-1.57.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a1bae6c0a07839cdeaddbc0756b3b2b85e476c07945f64ece08f1f956a86f1", size = 45706917, upload-time = "2025-12-09T08:06:32.549Z" },
+ { url = "https://files.pythonhosted.org/packages/e4/09/3fc9ebd7c95ee54ba6a68d5c0bc23e449f7235f4603fc60534a364934c16/playwright-1.57.0-py3-none-win32.whl", hash = "sha256:1dd93b265688da46e91ecb0606d36f777f8eadcf7fbef12f6426b20bf0c9137c", size = 36553860, upload-time = "2025-12-09T08:06:35.864Z" },
+ { url = "https://files.pythonhosted.org/packages/58/d4/dcdfd2a33096aeda6ca0d15584800443dd2be64becca8f315634044b135b/playwright-1.57.0-py3-none-win_amd64.whl", hash = "sha256:6caefb08ed2c6f29d33b8088d05d09376946e49a73be19271c8cd5384b82b14c", size = 36553864, upload-time = "2025-12-09T08:06:38.915Z" },
+ { url = "https://files.pythonhosted.org/packages/6a/60/fe31d7e6b8907789dcb0584f88be741ba388413e4fbce35f1eba4e3073de/playwright-1.57.0-py3-none-win_arm64.whl", hash = "sha256:5f065f5a133dbc15e6e7c71e7bc04f258195755b1c32a432b792e28338c8335e", size = 32837940, upload-time = "2025-12-09T08:06:42.268Z" },
+]
+
[[package]]
name = "prompt-toolkit"
version = "3.0.52"
@@ -2721,6 +2740,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" },
]
+[[package]]
+name = "pyee"
+version = "13.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/95/03/1fd98d5841cd7964a27d729ccf2199602fe05eb7a405c1462eb7277945ed/pyee-13.0.0.tar.gz", hash = "sha256:b391e3c5a434d1f5118a25615001dbc8f669cf410ab67d04c4d4e07c55481c37", size = 31250, upload-time = "2025-03-17T18:53:15.955Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/9b/4d/b9add7c84060d4c1906abe9a7e5359f2a60f7a9a4f67268b2766673427d8/pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498", size = 15730, upload-time = "2025-03-17T18:53:14.532Z" },
+]
+
[[package]]
name = "pygments"
version = "2.19.2"
@@ -3374,6 +3405,7 @@ dependencies = [
{ name = "notion-client" },
{ name = "pandas" },
{ name = "pandasql" },
+ { name = "playwright" },
{ name = "pydantic" },
{ name = "pymongo" },
{ name = "pymupdf" },
@@ -3405,6 +3437,7 @@ requires-dist = [
{ name = "notion-client", specifier = ">=2.0.0" },
{ name = "pandas", specifier = ">=2.3.3" },
{ name = "pandasql", specifier = ">=0.7.3" },
+ { name = "playwright", specifier = ">=1.57.0" },
{ name = "pydantic", specifier = ">=2.12.4" },
{ name = "pymongo", specifier = ">=4.6.1" },
{ name = "pymupdf", specifier = ">=1.26.7" },