|
1 | 1 | % AUTO-GENERATED FILE — DO NOT EDIT |
2 | | -% Merged static.bib + dynamic.bib on 2026-02-03T23:26:30Z |
| 2 | +% Merged static.bib + dynamic.bib on 2026-02-04T18:37:04Z |
3 | 3 |
|
4 | 4 | @inproceedings{10.1007/978-3-032-04617-8_3, |
5 | 5 | abstract = {Historical maps contain valuable, detailed survey data often unavailable elsewhere. Automatically extracting linear objects, such as fault lines, from scanned historical maps benefits diverse application areas, such as mining resource prediction. However, existing models encounter challenges in capturing adequate image context and spatial context. Insufficient image context leads to false detections by failing to distinguish desired linear objects from others with similar appearances. Meanwhile, insufficient spatial context hampers the accurate delineation of elongated, slender-shaped linear objects. This paper introduces the Linear Object Detection TRansformer (LDTR), which directly generates accurate vector graphs for linear objects from scanned map images. LDTR leverages multi-scale deformable attention to capture representative image context, reducing false detections. Furthermore, LDTR's innovative N-hop connectivity component explicitly encourages interactions among nodes within an N-hop neighborhood, enabling the model to learn sufficient spatial context for generating graphs with accurate connectivity. Experiments show that LDTR improves detection precision by 6{\%} and enhances line connectivity by 20{\%} over state-of-the-art baselines.}, |
@@ -1705,24 +1705,14 @@ @inproceedings{Park21-icc |
1705 | 1705 |
|
1706 | 1706 | @misc{Pyo2025_frieda_benchmarking_multistep_cartographic_reasoning_in, |
1707 | 1707 | abstract = {Cartographic reasoning is the skill of interpreting geographic relationships by aligning legends, map scales, compass directions, map texts, and geometries across one or more map images. Although essential as a concrete cognitive capability and for critical tasks such as disaster response and urban planning, it remains largely unevaluated. Building on progress in chart and infographic understanding, recent large vision language model studies on map visual question-answering often treat maps as a special case of charts. In contrast, map VQA demands comprehension of layered symbology (e.g., symbols, geometries, and text labels) as well as spatial relations tied to orientation and distance that often span multiple maps and are not captured by chart-style evaluations. To address this gap, we introduce FRIEDA, a benchmark for testing complex open-ended cartographic reasoning in LVLMs. FRIEDA sources real map images from documents and reports in various domains and geographical areas. Following classifications in Geographic Information System (GIS) literature, FRIEDA targets all three categories of spatial relations: topological (border, equal, intersect, within), metric (distance), and directional (orientation). All questions require multi-step inference, and many require cross-map grounding and reasoning. We evaluate eleven state-of-the-art LVLMs under two settings: (1) the direct setting, where we provide the maps relevant to the question, and (2) the contextual setting, where the model may have to identify the maps relevant to the question before reasoning. Even the strongest models, Gemini-2.5-Pro and GPT-5-Think, achieve only 38.20% and 37.20% accuracy, respectively, far below human performance of 84.87%. These results reveal a persistent gap in multi-step cartographic reasoning, positioning FRIEDA as a rigorous benchmark to drive progress on spatial intelligence in LVLMs.}, |
1708 | | - author = {Jiyoon Pyo and Yuankun Jiao and Dongwon Jung and Zekun Li and Leeje Jang and Sofia Kirsanova and Jina Kim and Yijun Lin and Qin Liu and Junyi Xie and Hadi Askari and Nan Xu and Muhao Chen and Yao-Yi Chiang}, |
| 1708 | + author = {FRIEDA and Jiyoon Pyo and Yuankun Jiao and Dongwon Jung and Zekun Li and Leeje Jang and Sofia Kirsanova and Jina Kim and Yijun Lin and Qin Liu and Junyi Xie and Hadi Askari and Nan Xu and Muhao Chen and Yao-Yi Chiang}, |
1709 | 1709 | howpublished = {arXiv}, |
1710 | 1710 | primaryclass = {cs.CV}, |
1711 | 1711 | title = {FRIEDA: Benchmarking Multi-Step Cartographic Reasoning in Vision-Language Models}, |
1712 | 1712 | url = {https://arxiv.org/api/cAn1zHHcKlclQ3NWAR92PR4QKLc}, |
1713 | 1713 | year = {2025} |
1714 | 1714 | } |
1715 | 1715 |
|
1716 | | -@misc{Qasemi2025_mapqa_opendomain_geospatial_question_answering_on, |
1717 | | - abstract = {Geospatial question answering (QA) is a fundamental task in navigation and point of interest (POI) searches. While existing geospatial QA datasets exist, they are limited in both scale and diversity, often relying solely on textual descriptions of geo-entities without considering their geometries. A major challenge in scaling geospatial QA datasets for reasoning lies in the complexity of geospatial relationships, which require integrating spatial structures, topological dependencies, and multi-hop reasoning capabilities that most text-based QA datasets lack. To address these limitations, we introduce MapQA, a novel dataset that not only provides question-answer pairs but also includes the geometries of geo-entities referenced in the questions. MapQA is constructed using SQL query templates to extract question-answer pairs from OpenStreetMap (OSM) for two study regions: Southern California and Illinois. It consists of 3,154 QA pairs spanning nine question types that require geospatial reasoning, such as neighborhood inference and geo-entity type identification. Compared to existing datasets, MapQA expands both the number and diversity of geospatial question types. We explore two approaches to tackle this challenge: (1) a retrieval-based language model that ranks candidate geo-entities by embedding similarity, and (2) a large language model (LLM) that generates SQL queries from natural language questions and geo-entity attributes, which are then executed against an OSM database. Our findings indicate that retrieval-based methods effectively capture concepts like closeness and direction but struggle with questions that require explicit computations (e.g., distance calculations). LLMs (e.g., GPT and Gemini) excel at generating SQL queries for one-hop reasoning but face challenges with multi-hop reasoning, highlighting a key bottleneck in advancing geospatial QA systems.}, |
1718 | | - author = {Zekun Li</name> <arxiv:affiliation>Ehsan</arxiv:affiliation> </author> <author> <name>Malcolm Grossman</name> <arxiv:affiliation>Ehsan</arxiv:affiliation> </author> <author> <name> Eric</name> <arxiv:affiliation>Ehsan</arxiv:affiliation> </author> <author> <name> Qasemi and Mihir Kulkarni and Muhao Chen and Yao-Yi Chiang}, |
1719 | | - howpublished = {arXiv}, |
1720 | | - primaryclass = {cs.CL}, |
1721 | | - title = {MapQA: Open-domain Geospatial Question Answering on Map Data}, |
1722 | | - url = {https://arxiv.org/api/iYRpIcVk26QJA3RoS979LDUIOwY}, |
1723 | | - year = {2025} |
1724 | | -} |
1725 | | - |
1726 | 1716 | @article{s21175801, |
1727 | 1717 | abstract = {Many approaches to time series classification rely on machine learning methods. However, there is growing interest in going beyond black box prediction models to understand discriminatory features of the time series and their associations with outcomes. One promising method is time-series shapelets (TSS), which identifies maximally discriminative subsequences of time series. For example, in environmental health applications TSS could be used to identify short-term patterns in exposure time series (shapelets) associated with adverse health outcomes. Identification of candidate shapelets in TSS is computationally intensive. The original TSS algorithm used exhaustive search. Subsequent algorithms introduced efficiencies by trimming/aggregating the set of candidates or training candidates from initialized values, but these approaches have limitations. In this paper, we introduce Wavelet-TSS (W-TSS) a novel intelligent method for identifying candidate shapelets in TSS using wavelet transformation discovery. We tested W-TSS on two datasets: (1) a synthetic example used in previous TSS studies and (2) a panel study relating exposures from residential air pollution sensors to symptoms in participants with asthma. Compared to previous TSS algorithms, W-TSS was more computationally efficient, more accurate, and was able to discover more discriminative shapelets. W-TSS does not require pre-specification of shapelet length.}, |
1728 | 1718 | article-number = {5801}, |
|
0 commit comments