Skip to content

Commit 94c461f

Browse files
chore: update publications from Google Scholar
1 parent 8da20d0 commit 94c461f

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

publications.bib

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
% AUTO-GENERATED FILE — DO NOT EDIT
2-
% Merged static.bib + dynamic.bib on 2026-02-04T18:37:04Z
2+
% Merged static.bib + dynamic.bib on 2026-02-04T18:41:03Z
33
44
@inproceedings{10.1007/978-3-032-04617-8_3,
55
abstract = {Historical maps contain valuable, detailed survey data often unavailable elsewhere. Automatically extracting linear objects, such as fault lines, from scanned historical maps benefits diverse application areas, such as mining resource prediction. However, existing models encounter challenges in capturing adequate image context and spatial context. Insufficient image context leads to false detections by failing to distinguish desired linear objects from others with similar appearances. Meanwhile, insufficient spatial context hampers the accurate delineation of elongated, slender-shaped linear objects. This paper introduces the Linear Object Detection TRansformer (LDTR), which directly generates accurate vector graphs for linear objects from scanned map images. LDTR leverages multi-scale deformable attention to capture representative image context, reducing false detections. Furthermore, LDTR's innovative N-hop connectivity component explicitly encourages interactions among nodes within an N-hop neighborhood, enabling the model to learn sufficient spatial context for generating graphs with accurate connectivity. Experiments show that LDTR improves detection precision by 6{\%} and enhances line connectivity by 20{\%} over state-of-the-art baselines.},
@@ -1705,7 +1705,7 @@ @inproceedings{Park21-icc
17051705

17061706
@misc{Pyo2025_frieda_benchmarking_multistep_cartographic_reasoning_in,
17071707
abstract = {Cartographic reasoning is the skill of interpreting geographic relationships by aligning legends, map scales, compass directions, map texts, and geometries across one or more map images. Although essential as a concrete cognitive capability and for critical tasks such as disaster response and urban planning, it remains largely unevaluated. Building on progress in chart and infographic understanding, recent large vision language model studies on map visual question-answering often treat maps as a special case of charts. In contrast, map VQA demands comprehension of layered symbology (e.g., symbols, geometries, and text labels) as well as spatial relations tied to orientation and distance that often span multiple maps and are not captured by chart-style evaluations. To address this gap, we introduce FRIEDA, a benchmark for testing complex open-ended cartographic reasoning in LVLMs. FRIEDA sources real map images from documents and reports in various domains and geographical areas. Following classifications in Geographic Information System (GIS) literature, FRIEDA targets all three categories of spatial relations: topological (border, equal, intersect, within), metric (distance), and directional (orientation). All questions require multi-step inference, and many require cross-map grounding and reasoning. We evaluate eleven state-of-the-art LVLMs under two settings: (1) the direct setting, where we provide the maps relevant to the question, and (2) the contextual setting, where the model may have to identify the maps relevant to the question before reasoning. Even the strongest models, Gemini-2.5-Pro and GPT-5-Think, achieve only 38.20% and 37.20% accuracy, respectively, far below human performance of 84.87%. These results reveal a persistent gap in multi-step cartographic reasoning, positioning FRIEDA as a rigorous benchmark to drive progress on spatial intelligence in LVLMs.},
1708-
author = {FRIEDA and Jiyoon Pyo and Yuankun Jiao and Dongwon Jung and Zekun Li and Leeje Jang and Sofia Kirsanova and Jina Kim and Yijun Lin and Qin Liu and Junyi Xie and Hadi Askari and Nan Xu and Muhao Chen and Yao-Yi Chiang},
1708+
author = {Jiyoon Pyo and Yuankun Jiao and Dongwon Jung and Zekun Li and Leeje Jang and Sofia Kirsanova and Jina Kim and Yijun Lin and Qin Liu and Junyi Xie and Hadi Askari and Nan Xu and Muhao Chen and Yao-Yi Chiang},
17091709
howpublished = {arXiv},
17101710
primaryclass = {cs.CV},
17111711
title = {FRIEDA: Benchmarking Multi-Step Cartographic Reasoning in Vision-Language Models},

0 commit comments

Comments
 (0)