Skip to content

Commit 17cfdc7

Browse files
Merge pull request #564 from skmendez/feature/remove-cpuinfo
remove cpuinfo dependency and filesystem caching of get_cpu_info call
2 parents 12c4d4d + b1b6fcd commit 17cfdc7

3 files changed

Lines changed: 146 additions & 78 deletions

File tree

.guix/modules/python-blosc2-package.scm

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,7 @@
9494
(when tests?
9595
(invoke "env" "PYTHONPATH=." "pytest")))))))
9696
(inputs (list c-blosc2))
97-
(propagated-inputs (list python-msgpack python-ndindex python-numpy
98-
python-py-cpuinfo))
97+
(propagated-inputs (list python-msgpack python-ndindex python-numpy))
9998
(native-inputs (list cmake-minimal pkg-config python-cython-3
10099
python-pytest python-scikit-build))
101100
(home-page "https://github.com/blosc/python-blosc2")

pyproject.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,7 @@ dependencies = [
3636
"numpy>=1.26",
3737
"ndindex",
3838
"msgpack",
39-
"platformdirs",
4039
"numexpr>=2.14.1; platform_machine != 'wasm32'",
41-
"py-cpuinfo; platform_machine != 'wasm32'",
4240
"requests",
4341
]
4442
version = "4.0.0-b2.dev0"

src/blosc2/core.py

Lines changed: 145 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
# Avoid checking the name of type annotations at run time
99
from __future__ import annotations
1010

11-
import contextlib
1211
import copy
1312
import ctypes
1413
import ctypes.util
@@ -22,31 +21,21 @@
2221
import sys
2322
from dataclasses import asdict
2423
from functools import lru_cache
25-
from typing import TYPE_CHECKING, Any
24+
from typing import TYPE_CHECKING, ClassVar
2625

2726
import numpy as np
28-
import platformdirs
2927
import requests
3028

3129
import blosc2
3230
from blosc2 import blosc2_ext
3331

34-
if not blosc2.IS_WASM:
35-
import cpuinfo
36-
3732
if TYPE_CHECKING:
3833
from collections.abc import Callable
3934

4035
import tensorflow
4136
import torch
4237

4338

44-
_USER_CACHE_DIR: pathlib.Path = platformdirs.user_cache_path(
45-
appname="python-blosc2",
46-
appauthor="blosc",
47-
)
48-
49-
5039
def _check_typesize(typesize):
5140
if not 1 <= typesize <= blosc2_ext.MAX_TYPESIZE:
5241
raise ValueError(f"typesize can only be in the 1-{blosc2_ext.MAX_TYPESIZE} range.")
@@ -1151,11 +1140,12 @@ def print_versions():
11511140
print("-=" * 38)
11521141

11531142

1154-
def apple_silicon_cache_size(cache_level: int) -> int:
1143+
def apple_silicon_cache_size(cache_level: int) -> int | None:
11551144
"""Get the data cache_level size in bytes for Apple Silicon in MacOS.
11561145
11571146
Apple Silicon has two clusters, Performance (0) and Efficiency (1).
11581147
This function returns the data cache size for the Performance cluster.
1148+
Returns None if the cache size cannot be determined.
11591149
"""
11601150
libc = ctypes.CDLL(ctypes.util.find_library("c"))
11611151
size = ctypes.c_size_t()
@@ -1166,7 +1156,87 @@ def apple_silicon_cache_size(cache_level: int) -> int:
11661156
hwcachesize = f"hw.perflevel0.l{cache_level}cachesize"
11671157
hwcachesize = hwcachesize.encode("ascii")
11681158
libc.sysctlbyname(hwcachesize, ctypes.byref(size), ctypes.byref(ctypes.c_size_t(8)), None, 0)
1169-
return size.value
1159+
return size.value if size.value > 0 else None
1160+
1161+
1162+
def windows_cache_size(cache_level: int) -> int | None:
1163+
"""Get the data cache size in bytes for Windows.
1164+
1165+
Semantics:
1166+
- L1: data cache only
1167+
- L2/L3: unified cache (data + instruction), as no split exists
1168+
1169+
Returns None if the cache size cannot be determined.
1170+
"""
1171+
from ctypes import wintypes
1172+
1173+
if cache_level not in (1, 2, 3):
1174+
return None
1175+
1176+
# Windows constants
1177+
RelationCache = 2
1178+
1179+
# PROCESSOR_CACHE_TYPE enum values
1180+
CacheUnified = 0
1181+
CacheData = 2
1182+
1183+
# Header structure to read Relationship and Size first
1184+
class PROCESSOR_INFO_HEADER(ctypes.Structure):
1185+
_fields_: ClassVar[list] = [
1186+
("Relationship", ctypes.c_int),
1187+
("Size", ctypes.c_uint),
1188+
]
1189+
1190+
# Only the fields we need from CACHE_RELATIONSHIP (first 12 bytes)
1191+
class CACHE_RELATIONSHIP(ctypes.Structure):
1192+
_fields_: ClassVar[list] = [
1193+
("Level", ctypes.c_ubyte),
1194+
("Associativity", ctypes.c_ubyte),
1195+
("LineSize", ctypes.c_ushort),
1196+
("CacheSize", ctypes.c_uint),
1197+
("Type", ctypes.c_uint),
1198+
]
1199+
1200+
kernel32 = ctypes.WinDLL("kernel32", use_last_error=True)
1201+
1202+
size = wintypes.DWORD(0)
1203+
1204+
# Query buffer size
1205+
kernel32.GetLogicalProcessorInformationEx(
1206+
RelationCache,
1207+
None,
1208+
ctypes.byref(size),
1209+
)
1210+
1211+
buffer = ctypes.create_string_buffer(size.value)
1212+
1213+
# Retrieve cache info
1214+
kernel32.GetLogicalProcessorInformationEx(
1215+
RelationCache,
1216+
buffer,
1217+
ctypes.byref(size),
1218+
)
1219+
1220+
offset = 0
1221+
header_size = ctypes.sizeof(PROCESSOR_INFO_HEADER)
1222+
1223+
while offset < size.value:
1224+
# Read header to get Size for advancing offset
1225+
header = PROCESSOR_INFO_HEADER.from_buffer_copy(buffer[offset : offset + header_size])
1226+
1227+
if header.Relationship == RelationCache:
1228+
# Read cache info starting after the header
1229+
cache = CACHE_RELATIONSHIP.from_buffer_copy(buffer[offset + header_size :])
1230+
1231+
if cache.Level == cache_level and (
1232+
(cache_level == 1 and cache.Type == CacheData)
1233+
or (cache_level > 1 and cache.Type == CacheUnified)
1234+
):
1235+
return cache.CacheSize
1236+
1237+
offset += header.Size
1238+
1239+
return None
11701240

11711241

11721242
def get_cache_info(cache_level: int) -> tuple:
@@ -1197,19 +1267,21 @@ def get_cache_info(cache_level: int) -> tuple:
11971267
raise ValueError(f"L{cache_level} cache not found in lscpu output")
11981268

11991269

1200-
def linux_cache_size(cache_level: int, default_size: int) -> int:
1201-
"""Get the data cache_level size in bytes for Linux."""
1202-
cache_size = default_size
1270+
def linux_cache_size(cache_level: int) -> int | None:
1271+
"""Get the data cache_level size in bytes for Linux.
1272+
1273+
Returns None if the cache size cannot be determined.
1274+
"""
12031275
try:
12041276
# Try to read the cache size from sysfs
12051277
with open(f"/sys/devices/system/cpu/cpu0/cache/index{cache_level}/size") as f:
12061278
size = f.read()
12071279
if size.endswith("K\n"):
1208-
cache_size = int(size[:-2]) * 2**10
1280+
return int(size[:-2]) * 2**10
12091281
elif size.endswith("M\n"):
1210-
cache_size = int(size[:-2]) * 2**20
1282+
return int(size[:-2]) * 2**20
12111283
elif size.endswith("G\n"):
1212-
cache_size = int(size[:-2]) * 2**30
1284+
return int(size[:-2]) * 2**30
12131285
except FileNotFoundError:
12141286
# Try with lscpu, if available.
12151287
try:
@@ -1219,70 +1291,69 @@ def linux_cache_size(cache_level: int, default_size: int) -> int:
12191291
# In general, dividing the cache size by the number of instances would bring
12201292
# best performance for private caches (L1 and L2). For shared caches (L3),
12211293
# this should be the case as well, but more experimentation is needed.
1222-
cache_size //= cache_instances
1223-
return cache_size
1294+
return cache_size // cache_instances
12241295
except (FileNotFoundError, ValueError):
1225-
# If lscpu is not available or the cache size cannot be read from sysfs,
1226-
# return the default size.
12271296
pass
1228-
return cache_size
1297+
return None
12291298

12301299

1231-
def _get_cpu_info():
1232-
if blosc2.IS_WASM:
1233-
# Emscripten/wasm32 does not have access to CPU information.
1234-
# Populate it with some reasonable defaults.
1235-
return {
1236-
"brand": "Emscripten",
1237-
"arch": "wasm32",
1238-
"count": 1,
1239-
"l1_data_cache_size": 32 * 1024,
1240-
"l2_cache_size": 256 * 1024,
1241-
"l3_cache_size": 1024 * 1024,
1242-
}
1243-
cpu_info = cpuinfo.get_cpu_info()
1244-
# cpuinfo does not correctly retrieve the cache sizes for Apple Silicon, so do it manually
1245-
if platform.system() == "Darwin":
1246-
cpu_info["l1_data_cache_size"] = apple_silicon_cache_size(1)
1247-
cpu_info["l2_cache_size"] = apple_silicon_cache_size(2)
1248-
cpu_info["l3_cache_size"] = apple_silicon_cache_size(3)
1249-
# cpuinfo does not correctly retrieve the cache sizes for all CPUs on Linux, so ask the kernel
1250-
if platform.system() == "Linux":
1251-
l1_data_cache_size = cpu_info.get("l1_data_cache_size", 32 * 1024)
1252-
# Cache level 0 is typically the L1 data cache, and level 1 is the L1 instruction cache
1253-
cpu_info["l1_data_cache_size"] = linux_cache_size(0, l1_data_cache_size)
1254-
l2_cache_size = cpu_info.get("l2_cache_size", 256 * 1024)
1255-
cpu_info["l2_cache_size"] = linux_cache_size(2, l2_cache_size)
1256-
l3_cache_size = cpu_info.get("l3_cache_size", 1024 * 1024)
1257-
cpu_info["l3_cache_size"] = linux_cache_size(3, l3_cache_size)
1258-
return cpu_info
1259-
1300+
def _available_cpus() -> int:
1301+
try:
1302+
# On Linux, this returns the number of CPUs available to the process,
1303+
# which may be less than os.cpu_count() due to CPU affinity settings.
1304+
return len(os.sched_getaffinity(0))
1305+
except AttributeError:
1306+
# os.sched_getaffinity is not available on all platforms
1307+
return os.cpu_count() or 1
12601308

1261-
def write_cached_cpu_info(cpu_info_dict: dict[str, Any]) -> None:
1262-
_USER_CACHE_DIR.mkdir(parents=True, exist_ok=True)
1263-
with (_USER_CACHE_DIR / "cpuinfo.json").open("w") as f:
1264-
json.dump(cpu_info_dict, f, indent=4)
12651309

1310+
def _update_cache_sizes(
1311+
cpu_info: dict, cache_size_func: Callable[[int], int | None], levels: tuple[int, int, int]
1312+
) -> None:
1313+
"""Update cpu_info with cache sizes from the given function.
12661314
1267-
def read_cached_cpu_info() -> dict[str, Any]:
1268-
try:
1269-
with (_USER_CACHE_DIR / "cpuinfo.json").open() as f:
1270-
return json.load(f)
1271-
except (FileNotFoundError, json.JSONDecodeError):
1272-
return {}
1315+
Args:
1316+
cpu_info: Dictionary to update with cache sizes.
1317+
cache_size_func: Function that takes a cache level and returns size or None.
1318+
levels: Tuple of (l1_level, l2_level, l3_level) to pass to cache_size_func.
1319+
"""
1320+
l1_level, l2_level, l3_level = levels
1321+
if (l1_data_cache_size := cache_size_func(l1_level)) is not None:
1322+
cpu_info["l1_data_cache_size"] = l1_data_cache_size
1323+
if (l2_cache_size := cache_size_func(l2_level)) is not None:
1324+
cpu_info["l2_cache_size"] = l2_cache_size
1325+
if (l3_cache_size := cache_size_func(l3_level)) is not None:
1326+
cpu_info["l3_cache_size"] = l3_cache_size
12731327

12741328

12751329
@lru_cache(maxsize=1)
1276-
def get_cpu_info() -> dict:
1277-
cached_info = read_cached_cpu_info()
1278-
if cached_info:
1279-
return cached_info
1280-
1281-
cpu_info_dict = _get_cpu_info()
1282-
with contextlib.suppress(OSError):
1283-
# In case cpu info cannot be stored, will need to be recomputed in the next process
1284-
write_cached_cpu_info(cpu_info_dict)
1285-
return cpu_info_dict
1330+
def get_cpu_info():
1331+
"""
1332+
Construct the result of cpuinfo.get_cpu_info(), without actually using
1333+
cpuinfo.get_cpu_info() since that function takes 1s to run and this method is ran
1334+
at import time.
1335+
"""
1336+
cpu_info = {
1337+
"count": _available_cpus(),
1338+
"l1_data_cache_size": 32 * 1024,
1339+
"l2_cache_size": 256 * 1024,
1340+
"l3_cache_size": 1024 * 1024,
1341+
}
1342+
1343+
if blosc2.IS_WASM:
1344+
# Emscripten/wasm32 does not have access to CPU information.
1345+
# Return defaults.
1346+
return cpu_info
1347+
1348+
if platform.system() == "Darwin":
1349+
_update_cache_sizes(cpu_info, apple_silicon_cache_size, (1, 2, 3))
1350+
elif platform.system() == "Linux":
1351+
# Cache level 0 is typically the L1 data cache, and level 1 is the L1 instruction cache
1352+
_update_cache_sizes(cpu_info, linux_cache_size, (0, 2, 3))
1353+
elif platform.system() == "Windows":
1354+
_update_cache_sizes(cpu_info, windows_cache_size, (1, 2, 3))
1355+
1356+
return cpu_info
12861357

12871358

12881359
def get_blocksize() -> int:

0 commit comments

Comments
 (0)