-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcore_api.py
More file actions
60 lines (51 loc) · 2.32 KB
/
Copy pathcore_api.py
File metadata and controls
60 lines (51 loc) · 2.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from __future__ import annotations
import json
import os
from typing import Any
import httpx
from config import get_http_client
CORE_API = "https://api.core.ac.uk/v3"
CORE_SEARCH_WORKS = f"{CORE_API}/search/works/"
CORE_API_KEY = os.environ.get("CORE_API_KEY", "")
CORE_API_AVAILABLE = bool(CORE_API_KEY.strip())
FIELDS = "id,title,yearPublished,authors,doi,abstract,downloadUrl,fullText,documentType,subjects,publisher,language,identifiers,links"
def _extract_work(w: dict) -> dict:
return {
"id": w.get("id", ""),
"title": w.get("title", "") or "",
"year": w.get("yearPublished", ""),
"authors": [a.get("name", "") for a in (w.get("authors") or [])],
"doi": w.get("doi", ""),
"abstract": (w.get("abstract", "") or "")[:1000],
"downloadUrl": w.get("downloadUrl", ""),
"hasFullText": bool(w.get("fullText")),
"documentType": w.get("documentType", []),
"subjects": w.get("subjects", []),
"publisher": w.get("publisher", ""),
"language": w.get("language", {}).get("name", "") if isinstance(w.get("language"), dict) else "",
"ids": w.get("identifiers", {}),
}
async def search_core_works(query: str, limit: int = 10, offset: int = 0) -> dict:
try:
params: dict[str, Any] = {"q": query, "limit": min(limit, 100), "offset": max(offset, 0)}
headers = {"Accept": "application/json"}
if CORE_API_KEY:
headers["Authorization"] = f"Bearer {CORE_API_KEY}"
c = get_http_client()
r = await c.get(CORE_SEARCH_WORKS, params=params, headers=headers, timeout=15)
if r.status_code == 301:
follow_url = r.headers.get("location", "")
if follow_url:
r = await c.get(follow_url, headers=headers, timeout=15)
if r.status_code != 200:
return {"success": False, "error": f"CORE API: {r.status_code}", "results": []}
data = r.json()
works = [_extract_work(w) for w in (data.get("results", []) or [])]
return {
"success": True,
"results": works,
"totalHits": data.get("totalHits", 0),
"searchId": data.get("searchId", ""),
}
except (httpx.HTTPError, ValueError, KeyError, json.JSONDecodeError) as e:
return {"success": False, "error": f"CORE: {e}", "results": []}