Skip to content

Can't sucessfully transform the pdf coordinates into image coordinates. #47

@Andy718811

Description

@Andy718811

Here is my code, the result is not very well.

import json
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import fitz  # pip install pymupdf
import json
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt

import json
from PIL import Image, ImageDraw
import fitz  # pip install pymupdf



import json
from PIL import Image, ImageDraw

class Chunk:
    def __init__(self, text, pos):
        self.text = text
        self.x1, self.x2, self.y1, self.y2 = pos

def load_chunks(json_path):
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    return [Chunk(text=entry["text"], pos=entry["pos"]) for entry in data["chunks"]]

def get_table_bounds(chunks):
    x_min = min(c.x1 for c in chunks)
    x_max = max(c.x2 for c in chunks)
    y_min = min(c.y1 for c in chunks)
    y_max = max(c.y2 for c in chunks)
    return x_min, x_max, y_min, y_max

def pdf_chunk_to_image_bbox(chunk, x_min, x_max, y_min, y_max, img_w, img_h, padding=2):
    scale_x = (img_w-2) / (x_max - x_min)
    scale_y = (img_h-2) / (y_max - y_min)

    x1 = (min(chunk.x1,chunk.x2) - x_min) * scale_x 
    x2 = (max(chunk.x1,chunk.x2) - x_min) * scale_x
    y1 = (y_max - min(chunk.y2,chunk.y1)) * scale_y   # Y 軸翻轉
    y2 = (y_max - max(chunk.y2,chunk.y1)) * scale_y
    return min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2)


def visualize_chunks_on_cropped_image(json_path, image_path, box_color="red", box_width=2):
    # 載入圖片與標註
    image = Image.open(image_path).convert("RGB")
    img_w, img_h = image.size
    draw = ImageDraw.Draw(image)

    chunks = load_chunks(json_path)
    x_min, x_max, y_min, y_max = get_table_bounds(chunks)

    for c in chunks:
        x1, y1, x2, y2 = pdf_chunk_to_image_bbox(c, x_min, x_max, y_min, y_max, img_w, img_h)
        draw.rectangle([(x1, y1), (x2, y2)], outline=box_color, width=box_width)

    image.show()


if __name__ == "__main__":
    json_path=r"D:\下載\SciTSR\SciTSR\train\chunk\0707.0704v1.1.chunk"
    image_path=r"D:\下載\SciTSR\SciTSR\train\img\0707.0704v1.1.png"
    visualize_chunks_on_cropped_image(json_path, image_path)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions