Skip to content

Commit ca3298f

Browse files
committed
API: added /ready emthod + health tests.
1 parent f00caa7 commit ca3298f

3 files changed

Lines changed: 159 additions & 1 deletion

File tree

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ Check `http://localhost:8090/docs` for input information.
9595
The service exposes:
9696

9797
- *GET* `/api/health` - returns `{"status": "healthy"}`,
98+
- *GET* `/api/ready` - returns readiness for OCR processing (`200` when ready, `503` when not ready),
9899
- *GET* `/api/info` - returns information about the service with its configuration,
99100
- *POST* `/api/process` - processes a binary data stream with the binary document content ("Content-Type: application/octet-stream"), also accepts binary files directly via the 'file' parameter, if sending via curl. It
100101
- *POST* `/api/process_file` - processes a file via multipart/form-data,

ocr_service/api/health.py

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from fastapi import APIRouter
1+
from typing import Any
2+
3+
import psutil
4+
from fastapi import APIRouter, Request, status
25
from fastapi.responses import ORJSONResponse
36

47
from ocr_service.dto.info_response import InfoResponse
@@ -12,6 +15,69 @@ def health() -> ORJSONResponse:
1215
return ORJSONResponse(content={"status": "healthy"})
1316

1417

18+
def _collect_readiness_issues(request: Request) -> list[str]:
19+
issues: list[str] = []
20+
21+
processor = getattr(request.app.state, "processor", None)
22+
if processor is None:
23+
return ["processor_not_initialized"]
24+
25+
loffice_process_list = getattr(processor, "loffice_process_list", None)
26+
if not isinstance(loffice_process_list, dict) or len(loffice_process_list) == 0:
27+
return ["libreoffice_process_list_empty"]
28+
29+
for port, proc_data in loffice_process_list.items():
30+
if not isinstance(proc_data, dict):
31+
issues.append(f"libreoffice_process_invalid_metadata:{port}")
32+
continue
33+
34+
if proc_data.get("unhealthy"):
35+
issues.append(f"libreoffice_process_marked_unhealthy:{port}")
36+
continue
37+
38+
process_obj = proc_data.get("process")
39+
process_pid: Any = getattr(process_obj, "pid", None) or proc_data.get("pid")
40+
41+
try:
42+
process_pid = int(process_pid)
43+
except (TypeError, ValueError):
44+
issues.append(f"libreoffice_process_missing_pid:{port}")
45+
continue
46+
47+
if process_obj is not None and hasattr(process_obj, "poll") and process_obj.poll() is not None:
48+
issues.append(f"libreoffice_process_exited:{port}")
49+
continue
50+
51+
if not psutil.pid_exists(process_pid):
52+
issues.append(f"libreoffice_process_pid_not_found:{port}")
53+
continue
54+
55+
try:
56+
lo_process = psutil.Process(process_pid)
57+
if not lo_process.is_running():
58+
issues.append(f"libreoffice_process_not_running:{port}")
59+
continue
60+
if lo_process.status() == psutil.STATUS_ZOMBIE:
61+
issues.append(f"libreoffice_process_zombie:{port}")
62+
except psutil.Error:
63+
issues.append(f"libreoffice_process_not_accessible:{port}")
64+
65+
return issues
66+
67+
68+
@health_api.get("/ready", response_class=ORJSONResponse)
69+
def ready(request: Request) -> ORJSONResponse:
70+
issues = _collect_readiness_issues(request)
71+
if len(issues) > 0:
72+
return ORJSONResponse(
73+
content={"status": "not_ready", "issues": issues},
74+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
75+
)
76+
77+
process_count = len(request.app.state.processor.loffice_process_list)
78+
return ORJSONResponse(content={"status": "ready", "libreoffice_processes": process_count})
79+
80+
1581
@health_api.get("/info", response_model=InfoResponse, response_class=ORJSONResponse)
1682
def info() -> ORJSONResponse:
1783
return ORJSONResponse(content=get_app_info())

ocr_service/tests/test_health.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import unittest
2+
from unittest.mock import patch
3+
4+
from fastapi import FastAPI
5+
from fastapi.testclient import TestClient
6+
7+
from ocr_service.api.health import health_api
8+
9+
10+
class DummySubprocess:
11+
def __init__(self, pid: int, returncode: int | None = None) -> None:
12+
self.pid = pid
13+
self._returncode = returncode
14+
15+
def poll(self) -> int | None:
16+
return self._returncode
17+
18+
19+
class DummyPsutilProcess:
20+
def __init__(self, running: bool, process_status: str) -> None:
21+
self._running = running
22+
self._status = process_status
23+
24+
def is_running(self) -> bool:
25+
return self._running
26+
27+
def status(self) -> str:
28+
return self._status
29+
30+
31+
class DummyProcessor:
32+
def __init__(self, loffice_process_list):
33+
self.loffice_process_list = loffice_process_list
34+
35+
36+
class TestHealthApi(unittest.TestCase):
37+
def setUp(self) -> None:
38+
self.app = FastAPI()
39+
self.app.include_router(health_api)
40+
self.client = TestClient(self.app)
41+
42+
def tearDown(self) -> None:
43+
self.client.close()
44+
45+
def test_health_returns_healthy(self):
46+
response = self.client.get("/api/health")
47+
self.assertEqual(response.status_code, 200)
48+
self.assertEqual(response.json(), {"status": "healthy"})
49+
50+
def test_ready_returns_503_when_processor_not_initialized(self):
51+
response = self.client.get("/api/ready")
52+
self.assertEqual(response.status_code, 503)
53+
data = response.json()
54+
self.assertEqual(data.get("status"), "not_ready")
55+
self.assertIn("processor_not_initialized", data.get("issues", []))
56+
57+
def test_ready_returns_503_when_libreoffice_process_exited(self):
58+
self.app.state.processor = DummyProcessor(
59+
{
60+
"9900": {
61+
"process": DummySubprocess(pid=12345, returncode=1),
62+
"pid": 12345,
63+
"unhealthy": False,
64+
}
65+
}
66+
)
67+
68+
response = self.client.get("/api/ready")
69+
self.assertEqual(response.status_code, 503)
70+
data = response.json()
71+
self.assertEqual(data.get("status"), "not_ready")
72+
self.assertIn("libreoffice_process_exited:9900", data.get("issues", []))
73+
74+
@patch("ocr_service.api.health.psutil.Process")
75+
@patch("ocr_service.api.health.psutil.pid_exists", return_value=True)
76+
def test_ready_returns_200_for_running_libreoffice_process(self, _pid_exists, process_mock):
77+
process_mock.return_value = DummyPsutilProcess(running=True, process_status="sleeping")
78+
self.app.state.processor = DummyProcessor(
79+
{
80+
"9900": {
81+
"process": DummySubprocess(pid=12345, returncode=None),
82+
"pid": 12345,
83+
"unhealthy": False,
84+
}
85+
}
86+
)
87+
88+
response = self.client.get("/api/ready")
89+
self.assertEqual(response.status_code, 200)
90+
self.assertEqual(response.json(), {"status": "ready", "libreoffice_processes": 1})
91+

0 commit comments

Comments
 (0)