Skip to content

Commit 34102d4

Browse files
committed
Fix documentation inconsistencies with actual code and add missing linkify-it-py dependency
1 parent ffc9eb7 commit 34102d4

6 files changed

Lines changed: 341 additions & 120 deletions

File tree

docs/analytics.rst

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -93,33 +93,51 @@ Parquet
9393

9494
.. code-block:: python
9595
96-
await client.analytics.to_parquet(query, "output.parquet")
96+
from pydhis2.core.types import ExportFormat
97+
98+
await client.analytics.export_to_file(
99+
query,
100+
"output.parquet",
101+
format=ExportFormat.PARQUET
102+
)
97103
98-
Arrow
99-
~~~~~
104+
CSV
105+
~~~
100106

101107
.. code-block:: python
102108
103-
table = await client.analytics.to_arrow(query)
109+
from pydhis2.core.types import ExportFormat
110+
111+
await client.analytics.export_to_file(
112+
query,
113+
"output.csv",
114+
format=ExportFormat.CSV
115+
)
104116
105-
CSV
106-
~~~
117+
Arrow
118+
~~~~~
107119

108120
.. code-block:: python
109121
110-
df = await client.analytics.to_pandas(query)
111-
df.to_csv("output.csv", index=False)
122+
table = await client.analytics.to_arrow(query)
123+
print(table.schema)
112124
113-
Pagination
114-
----------
125+
Pagination and Streaming
126+
-------------------------
115127

116128
For large datasets:
117129

118130
.. code-block:: python
119131
120-
async for page in client.analytics.stream(query, page_size=1000):
121-
print(f"Processing {len(page)} records")
122-
# Process each page
132+
async with AsyncDHIS2Client(config) as client:
133+
async for page_df in client.analytics.stream_paginated(
134+
query,
135+
page_size=1000,
136+
max_pages=10
137+
):
138+
print(f"Processing {len(page_df)} records")
139+
# Process each page DataFrame
140+
# page_df is a pandas DataFrame
123141
124142
Filters
125143
-------

docs/datavaluesets.rst

Lines changed: 97 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ DataValueSets
33

44
The DataValueSets endpoint allows you to read and write individual data values.
55

6-
Reading Data Values
7-
-------------------
6+
Pulling (Reading) Data Values
7+
------------------------------
88

99
.. code-block:: python
1010
@@ -14,46 +14,116 @@ Reading Data Values
1414
config = DHIS2Config()
1515
1616
async with AsyncDHIS2Client(config) as client:
17-
data = await client.datavaluesets.get(
18-
dataSet="dataSetId",
19-
orgUnit="orgUnitId",
17+
# Pull data values - returns DataFrame directly
18+
df = await client.datavaluesets.pull(
19+
data_set="dataSetId",
20+
org_unit="orgUnitId",
2021
period="202301"
2122
)
22-
df = data.to_pandas()
2323
print(df)
24+
25+
# Pull with date range
26+
df = await client.datavaluesets.pull(
27+
data_set="dataSetId",
28+
org_unit="orgUnitId",
29+
start_date="2023-01-01",
30+
end_date="2023-12-31",
31+
children=True # Include child org units
32+
)
2433
25-
Writing Data Values
26-
-------------------
34+
Pushing (Writing) Data Values
35+
------------------------------
2736

2837
.. code-block:: python
2938
30-
data_values = {
31-
"dataSet": "dataSetId",
32-
"completeDate": "2023-01-31",
33-
"period": "202301",
34-
"orgUnit": "orgUnitId",
35-
"dataValues": [
36-
{
37-
"dataElement": "dataElementId",
38-
"value": "100"
39-
}
40-
]
41-
}
39+
from pydhis2.core.types import ImportConfig, ImportStrategy
4240
43-
response = await client.datavaluesets.post(data_values)
44-
print(response)
41+
async with AsyncDHIS2Client(config) as client:
42+
# Prepare data values
43+
data_values = {
44+
"dataSet": "dataSetId",
45+
"completeDate": "2023-01-31",
46+
"period": "202301",
47+
"orgUnit": "orgUnitId",
48+
"dataValues": [
49+
{
50+
"dataElement": "dataElementId",
51+
"value": "100"
52+
}
53+
]
54+
}
55+
56+
# Push data
57+
summary = await client.datavaluesets.push(
58+
data_values,
59+
config=ImportConfig(
60+
strategy=ImportStrategy.CREATE_AND_UPDATE,
61+
dry_run=False
62+
)
63+
)
64+
65+
print(f"Imported: {summary.imported}")
66+
print(f"Updated: {summary.updated}")
67+
print(f"Conflicts: {len(summary.conflicts)}")
68+
69+
# Check conflicts
70+
if summary.has_conflicts:
71+
conflicts_df = summary.conflicts_df
72+
print(conflicts_df)
4573
46-
Bulk Import
47-
-----------
74+
Bulk Import with Chunking
75+
-------------------------
4876

49-
Import large datasets efficiently:
77+
Import large datasets efficiently with automatic chunking:
5078

5179
.. code-block:: python
5280
5381
import pandas as pd
82+
from pydhis2.core.types import ImportConfig
5483
55-
df = pd.read_csv("data.csv")
84+
async with AsyncDHIS2Client(config) as client:
85+
# Read DataFrame
86+
df = pd.read_csv("data.csv")
87+
88+
# Push with automatic chunking
89+
summary = await client.datavaluesets.push(
90+
df,
91+
chunk_size=5000, # Process 5000 records per chunk
92+
config=ImportConfig(atomic=False)
93+
)
94+
95+
print(f"Total imported: {summary.imported}")
96+
print(f"Total updated: {summary.updated}")
97+
98+
Streaming Large Datasets
99+
-------------------------
100+
101+
For very large datasets, stream in pages:
102+
103+
.. code-block:: python
104+
105+
async with AsyncDHIS2Client(config) as client:
106+
async for page_df in client.datavaluesets.pull_paginated(
107+
data_set="dataSetId",
108+
org_unit="orgUnitId",
109+
page_size=5000
110+
):
111+
print(f"Processing {len(page_df)} records")
112+
# Process each page
113+
114+
Export to File
115+
--------------
116+
117+
.. code-block:: python
118+
119+
from pydhis2.core.types import ExportFormat
56120
57121
async with AsyncDHIS2Client(config) as client:
58-
await client.datavaluesets.bulk_import(df, chunk_size=1000)
122+
await client.datavaluesets.export_to_file(
123+
"datavalues.parquet",
124+
format=ExportFormat.PARQUET,
125+
data_set="dataSetId",
126+
org_unit="orgUnitId",
127+
period="202301"
128+
)
59129

docs/dqr.rst

Lines changed: 66 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -18,76 +18,111 @@ Basic Usage
1818
.. code-block:: python
1919
2020
from pydhis2 import get_client, DHIS2Config
21-
from pydhis2.dqr import DQRMetrics
21+
from pydhis2.core.types import AnalyticsQuery
22+
from pydhis2.dqr.metrics import CompletenessMetrics, ConsistencyMetrics, TimelinessMetrics
2223
2324
AsyncDHIS2Client, _ = get_client()
2425
config = DHIS2Config()
2526
2627
async with AsyncDHIS2Client(config) as client:
2728
# Fetch analytics data
28-
query = AnalyticsQuery(dx=["..."], ou="...", pe="...")
29+
query = AnalyticsQuery(dx=["indicator_id"], ou="org_unit_id", pe="2023")
2930
df = await client.analytics.to_pandas(query)
3031
3132
# Run DQR analysis
32-
dqr = DQRMetrics(df)
33-
results = dqr.assess_all()
34-
print(results)
33+
completeness = CompletenessMetrics()
34+
consistency = ConsistencyMetrics()
35+
timeliness = TimelinessMetrics()
36+
37+
completeness_results = completeness.calculate(df)
38+
consistency_results = consistency.calculate(df)
39+
timeliness_results = timeliness.calculate(df)
40+
41+
for result in completeness_results + consistency_results + timeliness_results:
42+
print(f"{result.metric_name}: {result.value:.2%} ({result.status})")
3543
3644
Completeness Metrics
3745
--------------------
3846

3947
.. code-block:: python
4048
41-
dqr = DQRMetrics(df)
49+
from pydhis2.dqr.metrics import CompletenessMetrics
4250
43-
# Reporting completeness
44-
completeness = dqr.reporting_completeness()
45-
print(f"Completeness: {completeness:.2%}")
51+
completeness = CompletenessMetrics()
52+
results = completeness.calculate(df)
4653
47-
# Missing data analysis
48-
missing = dqr.missing_data_analysis()
49-
print(missing)
54+
for result in results:
55+
print(f"{result.metric_name}: {result.value:.2%}")
56+
print(f"Status: {result.status}")
57+
print(f"Message: {result.message}")
58+
print(f"Details: {result.details}")
5059
5160
Consistency Metrics
5261
-------------------
5362

5463
.. code-block:: python
5564
56-
# Outlier detection
57-
outliers = dqr.detect_outliers(threshold=3.0)
58-
print(f"Found {len(outliers)} outliers")
65+
from pydhis2.dqr.metrics import ConsistencyMetrics
66+
67+
consistency = ConsistencyMetrics()
68+
results = consistency.calculate(df)
5969
60-
# Variance analysis
61-
variance = dqr.variance_analysis()
62-
print(variance)
70+
for result in results:
71+
print(f"{result.metric_name}: {result.value:.2%}")
72+
if result.metric_name == "outlier_detection":
73+
print(f"Outliers detected: {result.details.get('outlier_count')}")
6374
6475
Timeliness Metrics
6576
------------------
6677

6778
.. code-block:: python
6879
69-
# Submission timeliness
70-
timeliness = dqr.submission_timeliness()
71-
print(f"On-time submissions: {timeliness:.2%}")
80+
from pydhis2.dqr.metrics import TimelinessMetrics
81+
82+
timeliness = TimelinessMetrics()
83+
results = timeliness.calculate(df)
84+
85+
for result in results:
86+
print(f"{result.metric_name}: {result.value:.2%}")
87+
print(f"Timely records: {result.details.get('timely_records')}/{result.details.get('total_records')}")
7288
7389
Generating Reports
7490
------------------
7591

76-
HTML Report
77-
~~~~~~~~~~~
78-
79-
.. code-block:: python
80-
81-
dqr.generate_report(output="dqr_report.html", format="html")
82-
83-
JSON Summary
84-
~~~~~~~~~~~~
92+
Collect All Results
93+
~~~~~~~~~~~~~~~~~~~
8594

8695
.. code-block:: python
8796
8897
import json
98+
from pydhis2.dqr.metrics import CompletenessMetrics, ConsistencyMetrics, TimelinessMetrics
99+
100+
# Calculate all metrics
101+
completeness = CompletenessMetrics()
102+
consistency = ConsistencyMetrics()
103+
timeliness = TimelinessMetrics()
104+
105+
all_results = (
106+
completeness.calculate(df) +
107+
consistency.calculate(df) +
108+
timeliness.calculate(df)
109+
)
110+
111+
# Convert to summary dict
112+
summary = {
113+
"metrics": [
114+
{
115+
"name": r.metric_name,
116+
"value": r.value,
117+
"status": r.status,
118+
"message": r.message,
119+
"details": r.details
120+
}
121+
for r in all_results
122+
]
123+
}
89124
90-
summary = dqr.summary()
125+
# Save to JSON
91126
with open("dqr_summary.json", "w") as f:
92127
json.dump(summary, f, indent=2)
93128

0 commit comments

Comments
 (0)