-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtf32.py
More file actions
24 lines (18 loc) · 688 Bytes
/
tf32.py
File metadata and controls
24 lines (18 loc) · 688 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import torch
# use HIPBLASLT_ALLOW_TF32=1
# Check if CUDA is available
if torch.cuda.is_available():
# Enable TF32 for matrix multiplications
torch.backends.cuda.matmul.allow_tf32 = True
# Create two tensors on the GPU
a = torch.randn(1000, 1000, device='cuda')
b = torch.randn(1000, 1000, device='cuda')
# Perform matrix multiplication using TF32
c = torch.matmul(a, b)
print("TF32 is enabled for matrix multiplication ", c)
torch.backends.cuda.matmul.allow_tf32 = False
# Perform matrix multiplication using FP32
c = torch.matmul(a, b)
print("FP32 is enabled for matrix multiplication ", c)
else:
print("CUDA is not available")