-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy path.env.example
More file actions
99 lines (89 loc) · 2.43 KB
/
.env.example
File metadata and controls
99 lines (89 loc) · 2.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# Dnet Configuration
# Auto-generated from settings definitions - DO NOT EDIT MANUALLY
# Copy to .env and modify as needed
# === Logging ===
# Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
DNET_LOG=INFO
# Enable profile logging
DNET_PROFILE=false
# === Observability / Profiling ===
# Enable profiling (also enabled by DNET_PROFILE=true)
DNET_OBS_ENABLED=false
# Sync after each layer computation
DNET_OBS_SYNC_PER_LAYER=false
# Sync every N iterations (0=disabled)
DNET_OBS_SYNC_EVERY_N=0
# === API Server ===
# HTTP server port
DNET_API_HTTP_PORT=8080
# gRPC callback port
DNET_API_GRPC_PORT=50051
# Compression percentage
DNET_API_COMPRESSION_PCT=0.0
# Max concurrent requests
DNET_API_MAX_CONCURRENT_REQUESTS=100
# Discovery port (0=dynamic)
DNET_API_DISCOVERY_PORT=0
# === Shard Server ===
# HTTP server port
DNET_SHARD_HTTP_PORT=8081
# gRPC server port
DNET_SHARD_GRPC_PORT=50052
# Activation queue size
DNET_SHARD_QUEUE_SIZE=256
# Custom shard name
DNET_SHARD_NAME=
# === Topology ===
# Number of resident windows
DNET_TOPOLOGY_RESIDENT_WINDOWS=1
# Number of warmup windows
DNET_TOPOLOGY_WARMUP_WINDOWS=1
# Enable extended statistics
DNET_TOPOLOGY_X_STATS=false
# === Transport ===
# Wire dtype for activations
DNET_TRANSPORT_WIRE_DTYPE=fp16
# Enable streaming transport
DNET_TRANSPORT_STREAMING=true
# Stream backoff seconds
DNET_TRANSPORT_STREAM_BACKOFF_S=0.5
# Stream idle timeout seconds
DNET_TRANSPORT_STREAM_IDLE_S=2.0
# Number of send retries
DNET_TRANSPORT_SEND_RETRIES=3
# Enable compression
DNET_TRANSPORT_COMPRESS=false
# Min bytes for compression
DNET_TRANSPORT_COMPRESS_MIN_BYTES=65536
# === Compute ===
# Prefetch mode for layer loading (off, sync, async)
DNET_COMPUTE_PREFETCH_MODE=off
# Use mlx fast load path
DNET_COMPUTE_MXLOAD_FASTPATH=true
# Input memory pool MB
DNET_COMPUTE_INPUT_POOL_MB=512
# Output memory pool MB
DNET_COMPUTE_OUTPUT_POOL_MB=512
# === KV Cache ===
# Cache quantization mode
DNET_KV_MODE=8bit
# Bits for quant mode
DNET_KV_BITS=8
# Quantization group size
DNET_KV_GROUP_SIZE=64
# KV cache TTL in seconds
DNET_KV_TTL_S=30.0
# === gRPC ===
# Max gRPC message length
DNET_GRPC_MAX_MESSAGE_LENGTH=67108864
# Max concurrent streams
DNET_GRPC_MAX_CONCURRENT_STREAMS=1024
# Keepalive interval ms
DNET_GRPC_KEEPALIVE_TIME_MS=120000
# Keepalive timeout ms
DNET_GRPC_KEEPALIVE_TIMEOUT_MS=20000
# === Storage ===
# Repacked layers directory
DNET_REPACK_DIR=~/.dria/dnet/repacked_layers
# Log files directory
DNET_LOG_DIR=~/.dria/dnet/logs