-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtest_memory.html
More file actions
46 lines (38 loc) · 1.91 KB
/
test_memory.html
File metadata and controls
46 lines (38 loc) · 1.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
<!doctype html>
<html>
<head>
<title>Memory Test</title>
</head>
<body>
<script>
// Test Llama 3.2 1B memory calculation
const model = { params: 1.2 }
// FP16 weights (mixed precision enabled by default)
const fp16Weights = (model.params * 1e9 * 2) / 1024 ** 3
console.log('FP16 Weights:', fp16Weights, 'GiB')
// FP32 master weights for mixed precision
const fp32MasterWeights = (model.params * 1e9 * 4) / 1024 ** 3
console.log('FP32 Master Weights:', fp32MasterWeights, 'GiB')
// Gradients (FP16)
const gradients = fp16Weights
console.log('Gradients:', gradients, 'GiB')
// AdamW optimizer (2x for momentum + variance)
const optimizer = fp32MasterWeights * 2
console.log('Optimizer:', optimizer, 'GiB')
// Total with mixed precision
const totalMixedPrecision = fp16Weights + fp32MasterWeights + gradients + optimizer
console.log('Total (Mixed Precision):', totalMixedPrecision, 'GiB')
// Activations (rough estimate for batch=4, seq=1024)
const activations = 2 // Conservative estimate
console.log('Activations:', activations, 'GiB')
console.log('GRAND TOTAL:', totalMixedPrecision + activations, 'GiB')
console.log('On W7900 48GB:', (((totalMixedPrecision + activations) / 48) * 100).toFixed(1) + '%')
// PCIe bandwidth for this amount of data
const dataPerStep = totalMixedPrecision * 0.5 // Moving half the data per step
const stepsPerSecond = 4
const bandwidthNeeded = dataPerStep * stepsPerSecond
console.log('Bandwidth needed:', bandwidthNeeded, 'GB/s')
console.log('PCIe 3.0 utilization:', ((bandwidthNeeded / 16) * 100).toFixed(1) + '%')
</script>
</body>
</html>