knowledge-computing
diff --git a/‎.github/workflows/deploy.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/deploy.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎_data/pub/dynamic.bib‎ ‎_data/pub/dynamic2.bib‎_data/pub/dynamic.bib renamed to _data/pub/dynamic2.bib b/‎_data/pub/dynamic.bib‎ ‎_data/pub/dynamic2.bib‎_data/pub/dynamic.bib renamed to _data/pub/dynamic2.bib
diff --git a/‎publications.bib‎
Lines changed: 1 addition & 1 deletion b/‎publications.bib‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎scripts.zip‎
28.7 KB b/‎scripts.zip‎
28.7 KB
diff --git a/‎scripts/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎scripts/__init__.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎scripts/__pycache__/__init__.cpython-310.pyc‎
173 Bytes b/‎scripts/__pycache__/__init__.cpython-310.pyc‎
173 Bytes
diff --git a/‎scripts/__pycache__/_update_utils.cpython-310.pyc‎
1.49 KB b/‎scripts/__pycache__/_update_utils.cpython-310.pyc‎
1.49 KB
diff --git a/‎scripts/__pycache__/helpers.cpython-310.pyc‎
4.04 KB b/‎scripts/__pycache__/helpers.cpython-310.pyc‎
4.04 KB
diff --git a/‎scripts/_archived_full.py‎
Lines changed: 23 additions & 5 deletions b/‎scripts/_archived_full.py‎
Lines changed: 23 additions & 5 deletions
diff --git a/‎scripts/providers/__init__.py‎ ‎scripts/_archiveproviders/__init__.py‎scripts/providers/__init__.py renamed to scripts/_archiveproviders/__init__.py
Lines changed: 1 addition & 1 deletion b/‎scripts/providers/__init__.py‎ ‎scripts/_archiveproviders/__init__.py‎scripts/providers/__init__.py renamed to scripts/_archiveproviders/__init__.py
Lines changed: 1 addition & 1 deletion
@@ -36,15 +36,15 @@ jobs:
 
       - name: Annual update on bib files
         run: |
-          python scripts/migrate_files.py
+          PYTHONPATH=. python scripts/migrate_files.py
 
     #   - name: Update publications file
     #     run: |
-    #       python scripts/update_pubs.py
+    #       PYTHONPATH=. python scripts/update_pubs.py --scholar_id Xf3M93cAAAAJ
 
       - name: Create publications.bib
         run: |
-          python scripts/merge_bibs.py
+          PYTHONPATH=. python scripts/merge_bibs.py
 
       - name: Commit & push if changed
         run: |
 
@@ -1,5 +1,5 @@
 % AUTO-GENERATED FILE — DO NOT EDIT
-% Merged static.bib + dynamic.bib on 2026-02-04T20:18:07Z
+% Updated on 2026-02-04T22:27:41Z
 
 @inproceedings{10.1007/978-3-032-04617-8_3,
   abstract = {Historical maps contain valuable, detailed survey data often unavailable elsewhere. Automatically extracting linear objects, such as fault lines, from scanned historical maps benefits diverse application areas, such as mining resource prediction. However, existing models encounter challenges in capturing adequate image context and spatial context. Insufficient image context leads to false detections by failing to distinguish desired linear objects from others with similar appearances. Meanwhile, insufficient spatial context hampers the accurate delineation of elongated, slender-shaped linear objects. This paper introduces the Linear Object Detection TRansformer (LDTR), which directly generates accurate vector graphs for linear objects from scanned map images. LDTR leverages multi-scale deformable attention to capture representative image context, reducing false detections. Furthermore, LDTR's innovative N-hop connectivity component explicitly encourages interactions among nodes within an N-hop neighborhood, enabling the model to learn sufficient spatial context for generating graphs with accurate connectivity. Experiments show that LDTR improves detection precision by 6{\%} and enhances line connectivity by 20{\%} over state-of-the-art baselines.},
 
@@ -26,10 +26,10 @@
 from tqdm import tqdm
 
 from scholarly import scholarly
-
-
 import bibtexparser
 from bibtexparser.bwriter import BibTexWriter
+
+
 # -----------------------------
 # Config
 # -----------------------------
@@ -66,6 +66,17 @@
 def now_year() -> int:
     return datetime.now(timezone.utc).year
 
+def clean_crossref_text(s: str) -> str:
+    """
+    Convert HTML entities (&lt; etc.) to characters, strip tags (<p>),
+    and normalize whitespace.
+    """
+    if not s:
+        return ""
+    s = html.unescape(s)          # &lt;p&gt; -> <p>
+    s = re.sub(r"<[^>]+>", " ", s)  # remove tags like <p>
+    return normalize_ws(s)
+
 
 def normalize_ws(s: str) -> str:
     return re.sub(r"\s+", " ", (s or "")).strip()
@@ -111,6 +122,7 @@ def token_set_ratio(a: str, b: str) -> float:
 def strip_html_tags(s: str) -> str:
     if not s:
         return ""
+    # Crossref abstracts are sometimes JATS-ish; strip tags crudely.
     s = re.sub(r"<[^>]+>", " ", s)
     s = html.unescape(s)
     return normalize_ws(s)
@@ -193,7 +205,6 @@ def parse_first_bibtex_entry(bibtex_str: str) -> dict:
             out[kk.lower()] = vv  # keep everything else
     return out
 
-
 def prefer_doi_key(entry: dict) -> None:
     """If doi exists, use it as BibTeX key (ID) like @...{10.1145/...,...}."""
     doi = (entry.get("doi") or "").strip()
@@ -319,15 +330,15 @@ def springer_bibtex_by_doi(doi: str) -> str:
 def get_bibtex_with_fallback(p_full: dict, title: str) -> str:
     # 1) Try directly
     try:
-        s = scholarly.bibtex(p_full)        # Get bibtext directly
+        s = scholarly.bibtex(p_full)
         if s:
             return s
     except Exception:
         pass
 
     # 2) Fallback: search by title
     try:
-        q = scholarly.search_pubs(title)    # Search by title
+        q = scholarly.search_pubs(title)
         pub2 = next(q, None)
         if not pub2:
             return ""
@@ -465,6 +476,10 @@ def build_entry_keep_all_fields(
         "url": link_fallback,
     }, overwrite=False)
 
+    # Killing html elements
+    if entry.get("title"):
+        entry["title"] = clean_crossref_text(entry["title"])
+
     # Venue: only patch if missing in BOTH journal/booktitle
     if not (entry.get("journal") or entry.get("booktitle")):
         # choose booktitle for inproceedings, otherwise journal
@@ -720,6 +735,8 @@ def main():
         if y_int not in allowed_years:
             break
 
+        print(f"Doing for {idx}")
+
         authors = pick_authors(p_full)
         venue = pick_venue(p_full)
         link = pick_link(p_full)
@@ -853,6 +870,7 @@ def main():
                 best_doi = (best.get("DOI") or "").strip()
                 if best_doi:
                     crossref_bib = crossref_bibtex_transform(best_doi)
+                    # also fetch message if you want abstract etc.
                     try:
                         crossref_msg = crossref_lookup_by_doi(best_doi)
                     except Exception:
 
@@ -1,6 +1,6 @@
 from .arxiv import arxiv_entry, extract_arxiv_any
 from .acm import acm_bibtex_by_doi
 from .springer import springer_bibtex_by_doi
-from .crossref import crossref_entry
+from .crossref import crossref_lookup_by_doi, crossref_bibtex_by_doi, crossref_best
 from .scholarlyp import get_bibtex_with_fallback
 from .utils import build_entry_bibtex