Skip to content

Commit 15ebf91

Browse files
committed
code cleaning
1 parent a0dce04 commit 15ebf91

32 files changed

+1448
-275
lines changed

.github/workflows/deploy.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,15 @@ jobs:
3636
3737
- name: Annual update on bib files
3838
run: |
39-
python scripts/migrate_files.py
39+
PYTHONPATH=. python scripts/migrate_files.py
4040
4141
# - name: Update publications file
4242
# run: |
43-
# python scripts/update_pubs.py
43+
# PYTHONPATH=. python scripts/update_pubs.py --scholar_id Xf3M93cAAAAJ
4444

4545
- name: Create publications.bib
4646
run: |
47-
python scripts/merge_bibs.py
47+
PYTHONPATH=. python scripts/merge_bibs.py
4848
4949
- name: Commit & push if changed
5050
run: |

publications.bib

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
% AUTO-GENERATED FILE — DO NOT EDIT
2-
% Merged static.bib + dynamic.bib on 2026-02-04T20:18:07Z
2+
% Updated on 2026-02-04T22:27:41Z
33
44
@inproceedings{10.1007/978-3-032-04617-8_3,
55
abstract = {Historical maps contain valuable, detailed survey data often unavailable elsewhere. Automatically extracting linear objects, such as fault lines, from scanned historical maps benefits diverse application areas, such as mining resource prediction. However, existing models encounter challenges in capturing adequate image context and spatial context. Insufficient image context leads to false detections by failing to distinguish desired linear objects from others with similar appearances. Meanwhile, insufficient spatial context hampers the accurate delineation of elongated, slender-shaped linear objects. This paper introduces the Linear Object Detection TRansformer (LDTR), which directly generates accurate vector graphs for linear objects from scanned map images. LDTR leverages multi-scale deformable attention to capture representative image context, reducing false detections. Furthermore, LDTR's innovative N-hop connectivity component explicitly encourages interactions among nodes within an N-hop neighborhood, enabling the model to learn sufficient spatial context for generating graphs with accurate connectivity. Experiments show that LDTR improves detection precision by 6{\%} and enhances line connectivity by 20{\%} over state-of-the-art baselines.},

scripts.zip

28.7 KB
Binary file not shown.

scripts/__init__.py

Lines changed: 0 additions & 1 deletion
This file was deleted.
173 Bytes
Binary file not shown.
1.49 KB
Binary file not shown.
4.04 KB
Binary file not shown.

scripts/_archived_full.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@
2626
from tqdm import tqdm
2727

2828
from scholarly import scholarly
29-
30-
3129
import bibtexparser
3230
from bibtexparser.bwriter import BibTexWriter
31+
32+
3333
# -----------------------------
3434
# Config
3535
# -----------------------------
@@ -66,6 +66,17 @@
6666
def now_year() -> int:
6767
return datetime.now(timezone.utc).year
6868

69+
def clean_crossref_text(s: str) -> str:
70+
"""
71+
Convert HTML entities (&lt; etc.) to characters, strip tags (<p>),
72+
and normalize whitespace.
73+
"""
74+
if not s:
75+
return ""
76+
s = html.unescape(s) # &lt;p&gt; -> <p>
77+
s = re.sub(r"<[^>]+>", " ", s) # remove tags like <p>
78+
return normalize_ws(s)
79+
6980

7081
def normalize_ws(s: str) -> str:
7182
return re.sub(r"\s+", " ", (s or "")).strip()
@@ -111,6 +122,7 @@ def token_set_ratio(a: str, b: str) -> float:
111122
def strip_html_tags(s: str) -> str:
112123
if not s:
113124
return ""
125+
# Crossref abstracts are sometimes JATS-ish; strip tags crudely.
114126
s = re.sub(r"<[^>]+>", " ", s)
115127
s = html.unescape(s)
116128
return normalize_ws(s)
@@ -193,7 +205,6 @@ def parse_first_bibtex_entry(bibtex_str: str) -> dict:
193205
out[kk.lower()] = vv # keep everything else
194206
return out
195207

196-
197208
def prefer_doi_key(entry: dict) -> None:
198209
"""If doi exists, use it as BibTeX key (ID) like @...{10.1145/...,...}."""
199210
doi = (entry.get("doi") or "").strip()
@@ -319,15 +330,15 @@ def springer_bibtex_by_doi(doi: str) -> str:
319330
def get_bibtex_with_fallback(p_full: dict, title: str) -> str:
320331
# 1) Try directly
321332
try:
322-
s = scholarly.bibtex(p_full) # Get bibtext directly
333+
s = scholarly.bibtex(p_full)
323334
if s:
324335
return s
325336
except Exception:
326337
pass
327338

328339
# 2) Fallback: search by title
329340
try:
330-
q = scholarly.search_pubs(title) # Search by title
341+
q = scholarly.search_pubs(title)
331342
pub2 = next(q, None)
332343
if not pub2:
333344
return ""
@@ -465,6 +476,10 @@ def build_entry_keep_all_fields(
465476
"url": link_fallback,
466477
}, overwrite=False)
467478

479+
# Killing html elements
480+
if entry.get("title"):
481+
entry["title"] = clean_crossref_text(entry["title"])
482+
468483
# Venue: only patch if missing in BOTH journal/booktitle
469484
if not (entry.get("journal") or entry.get("booktitle")):
470485
# choose booktitle for inproceedings, otherwise journal
@@ -720,6 +735,8 @@ def main():
720735
if y_int not in allowed_years:
721736
break
722737

738+
print(f"Doing for {idx}")
739+
723740
authors = pick_authors(p_full)
724741
venue = pick_venue(p_full)
725742
link = pick_link(p_full)
@@ -853,6 +870,7 @@ def main():
853870
best_doi = (best.get("DOI") or "").strip()
854871
if best_doi:
855872
crossref_bib = crossref_bibtex_transform(best_doi)
873+
# also fetch message if you want abstract etc.
856874
try:
857875
crossref_msg = crossref_lookup_by_doi(best_doi)
858876
except Exception:
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from .arxiv import arxiv_entry, extract_arxiv_any
22
from .acm import acm_bibtex_by_doi
33
from .springer import springer_bibtex_by_doi
4-
from .crossref import crossref_entry
4+
from .crossref import crossref_lookup_by_doi, crossref_bibtex_by_doi, crossref_best
55
from .scholarlyp import get_bibtex_with_fallback
66
from .utils import build_entry_bibtex

0 commit comments

Comments
 (0)