Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
platform: [linux-x64, linux-aarch64, macos, windows]
# default runners don't have AVX-512 support, but icelake does
cpu_type: ["", icelake]
torch_version: ["2.4.1", "2.10.0", "2.11.0"]
torch_version: ["2.4.1", "2.12.0", "nightly"]

exclude:
# aarch64 minimum torch version is 2.5.1
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tests-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
platform: [linux-x64, linux-aarch64, macos]
# default runners don't have AVX-512 support, but icelake does
cpu_type: ["", icelake]
torch_version: ["2.4.1", "2.11.0"]
torch_version: ["2.4.1", "2.12.0"]

exclude:
# aarch64 minimum torch version is 2.5.1
Expand Down
4 changes: 2 additions & 2 deletions bitsandbytes/backends/cpu/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,9 @@ def _(
lambda: f"Blockwise 4bit dequantization only supports 16/32-bit floats, but got {dtype}",
)

# Fallback as AVX512 implementation has accuracy issues with fp16/fp32 and blocksize >= 2048
# Fallback as AVX512 implementation has accuracy issues with blocksize >= 2048.
# Note: this is not a common use case.
avx512_fallback = _has_avx512 and blocksize >= 2048 and dtype != torch.bfloat16
avx512_fallback = _has_avx512 and blocksize >= 2048

# Odd shape is not supported by this kernel; fallback to generic implementation
shape_fallback = shape[-1] % 2 != 0
Expand Down
9 changes: 7 additions & 2 deletions tests/test_linear4bit.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,8 +365,13 @@ def test_linear4bit_torch_compile(device, quant_type, compute_dtype, compress_st
if fullgraph and torch.__version__ < (2, 8, 0, "dev"):
pytest.skip("fullgraph mode requires torch 2.8 or higher")

if device == "cuda" and platform.system() == "Windows":
pytest.skip("Triton is not officially supported on Windows")
if platform.system() == "Windows":
if device == "cuda":
pytest.skip("Triton is not officially supported on Windows")
if device == "cpu" and torch.__version__ < (2, 7):
# torch.compile inductor on Windows CPU has include path bugs fixed in torch 2.7
# https://github.com/pytorch/pytorch/pull/148271
pytest.skip("torch.compile inductor on Windows CPU requires torch >= 2.7")

# Has a strange regression on Linux aarch64 CPU in torch==2.6.0 when fullgraph=False.
if (
Expand Down
9 changes: 7 additions & 2 deletions tests/test_linear8bitlt.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,13 @@ def test_linear8bitlt_torch_compile(device, threshold, bias, fullgraph, mode):
if fullgraph and torch.__version__ < (2, 5):
pytest.skip("fullgraph tracing of MatmulLtState requires torch >= 2.5")

if device == "cuda" and platform.system() == "Windows":
pytest.skip("Triton is not officially supported on Windows")
if platform.system() == "Windows":
if device == "cuda":
pytest.skip("Triton is not officially supported on Windows")
if device == "cpu" and torch.__version__ < (2, 7):
# torch.compile inductor on Windows CPU has include path bugs fixed in torch 2.7
# https://github.com/pytorch/pytorch/pull/148271
pytest.skip("torch.compile inductor on Windows CPU requires torch >= 2.7")

if device == "cuda" and mode == "reduce-overhead" and fullgraph and threshold > 0 and torch.__version__ >= (2, 10):
pytest.xfail("Failure due to regression in torch 2.10 related to reduced overhead mode and CUDA.")
Expand Down
Loading