python-sdk/examples/metrics_example.py at main · conductor-oss/python-sdk · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
"""
Example demonstrating Prometheus metrics collection and HTTP endpoint exposure.

This example shows how to:
- Enable Prometheus metrics collection for task execution
- Expose metrics via HTTP endpoint for scraping (served from memory)
- Track task poll times, execution times, errors, and more
- Integrate with Prometheus monitoring

Metrics collected:
- task_poll_total: Total number of task polls
- task_poll_time_seconds: Task poll duration
- task_execute_time_seconds: Task execution duration
- task_execute_error_total: Total task execution errors
- task_result_size_bytes: Task result payload size
- http_api_client_request: API request duration with quantiles

HTTP Mode vs File Mode:
- With http_port: Metrics served from memory at /metrics endpoint (no file written)
- Without http_port: Metrics written to file (no HTTP server)

Usage:
    1. Run this example: python3 metrics_example.py
    2. View metrics: curl http://localhost:8000/metrics
    3. Configure Prometheus to scrape: http://localhost:8000/metrics
"""

from conductor.client.automator.task_handler import TaskHandler
from conductor.client.configuration.configuration import Configuration
from conductor.client.configuration.settings.metrics_settings import MetricsSettings
from conductor.client.worker.worker_task import worker_task


# Example worker tasks (same as async_worker_example.py)

@worker_task(
    task_definition_name='async_http_task',
    thread_count=10,
    poll_timeout=10
)
async def async_http_worker(url: str = 'https://api.example.com/data', delay: float = 0.1) -> dict:
    """
    Async worker that simulates HTTP requests.

    This worker uses async/await to avoid blocking while waiting for I/O.
    Demonstrates metrics collection for async I/O-bound tasks.
    """
    import asyncio
    from datetime import datetime

    # Simulate async HTTP request
    await asyncio.sleep(delay)

    return {
        'url': url,
        'status': 'success',
        'timestamp': datetime.now().isoformat()
    }


@worker_task(
    task_definition_name='async_data_processor',
    thread_count=10,
    poll_timeout=10
)
async def async_data_processor(data: str, process_time: float = 0.5) -> dict:
    """
    Simple async worker with automatic parameter mapping.

    Input parameters are automatically extracted from task.input_data.
    Return value is automatically set as task.output_data.
    """
    import asyncio
    from datetime import datetime

    # Simulate async data processing
    await asyncio.sleep(process_time)

    # Process the data
    processed = data.upper()

    return {
        'original': data,
        'processed': processed,
        'length': len(processed),
        'processed_at': datetime.now().isoformat()
    }


@worker_task(
    task_definition_name='async_batch_processor',
    thread_count=5,
    poll_timeout=10
)
async def async_batch_processor(items: list) -> dict:
    """
    Process multiple items concurrently using asyncio.gather.

    Demonstrates how async workers can handle concurrent operations
    efficiently without blocking. Shows metrics for batch processing.
    """
    import asyncio
    from datetime import datetime

    async def process_item(item):
        await asyncio.sleep(0.1)  # Simulate I/O operation
        return f"processed_{item}"

    # Process all items concurrently
    results = await asyncio.gather(*[process_item(item) for item in items])

    return {
        'input_count': len(items),
        'results': results,
        'completed_at': datetime.now().isoformat()
    }


@worker_task(
    task_definition_name='sync_cpu_task',
    thread_count=5,
    poll_timeout=10
)
def sync_cpu_worker(n: int = 100000) -> dict:
    """
    Regular synchronous worker for CPU-bound operations.

    Use sync workers when your task is CPU-bound (calculations, parsing, etc.)
    Use async workers when your task is I/O-bound (network, database, files).
    Shows metrics collection for CPU-bound synchronous tasks.
    """
    # CPU-bound calculation
    result = sum(i * i for i in range(n))

    return {'result': result}

# Note: The HTTP server is now built into MetricsCollector.
# Simply specify http_port in MetricsSettings to enable it.


def main():
    """Run the example with metrics collection enabled."""

    # Configure metrics collection
    # The HTTP server is now built-in - just specify the http_port parameter
    metrics_settings = MetricsSettings(
        directory="/tmp/conductor-metrics",  # Temp directory for metrics .db files
        file_name="metrics.log",             # Metrics file name (for file-based access)
        update_interval=0.1,                 # Update every 100ms
        http_port=8000                       # Expose metrics via HTTP on port 8000
    )

    # Configure Conductor connection
    config = Configuration()

    print("=" * 80)
    print("Metrics Collection Example")
    print("=" * 80)
    print("")
    print("This example demonstrates Prometheus metrics collection and exposure.")
    print("")
    print(f"Metrics mode: HTTP (served from memory)")
    print(f"Metrics HTTP endpoint: http://localhost:{metrics_settings.http_port}/metrics")
    print(f"Health check: http://localhost:{metrics_settings.http_port}/health")
    print(f"Note: Metrics are NOT written to file when http_port is specified")
    print("")
    print("Workers available:")
    print("  - async_http_task: Async HTTP simulation (I/O-bound)")
    print("  - async_data_processor: Async data processing")
    print("  - async_batch_processor: Concurrent batch processing")
    print("  - sync_cpu_task: Synchronous CPU-bound calculations")
    print("")
    print("Try these commands:")
    print(f"  curl http://localhost:{metrics_settings.http_port}/metrics")
    print(f"  watch -n 1 'curl -s http://localhost:{metrics_settings.http_port}/metrics | grep task_poll_total'")
    print("")
    print("Press Ctrl+C to stop...")
    print("=" * 80)
    print("")

    try:
        # Create task handler with metrics enabled
        # The HTTP server will be started automatically by the MetricsProvider process
        with TaskHandler(
            configuration=config,
            metrics_settings=metrics_settings,
            scan_for_annotated_workers=True
        ) as task_handler:
            task_handler.start_processes()
            task_handler.join_processes()

    except KeyboardInterrupt:
        print("\n\nShutting down gracefully...")

    except Exception as e:
        print(f"\nError: {e}")
        raise

    print("\nWorkers stopped. Goodbye!")


if __name__ == '__main__':
    try:
        main()
    except KeyboardInterrupt:
        pass