88# Avoid checking the name of type annotations at run time
99from __future__ import annotations
1010
11- import contextlib
1211import copy
1312import ctypes
1413import ctypes .util
2221import sys
2322from dataclasses import asdict
2423from functools import lru_cache
25- from typing import TYPE_CHECKING , Any
24+ from typing import TYPE_CHECKING , ClassVar
2625
2726import numpy as np
28- import platformdirs
2927import requests
3028
3129import blosc2
3230from blosc2 import blosc2_ext
3331
34- if not blosc2 .IS_WASM :
35- import cpuinfo
36-
3732if TYPE_CHECKING :
3833 from collections .abc import Callable
3934
4035 import tensorflow
4136 import torch
4237
4338
44- _USER_CACHE_DIR : pathlib .Path = platformdirs .user_cache_path (
45- appname = "python-blosc2" ,
46- appauthor = "blosc" ,
47- )
48-
49-
5039def _check_typesize (typesize ):
5140 if not 1 <= typesize <= blosc2_ext .MAX_TYPESIZE :
5241 raise ValueError (f"typesize can only be in the 1-{ blosc2_ext .MAX_TYPESIZE } range." )
@@ -1151,11 +1140,12 @@ def print_versions():
11511140 print ("-=" * 38 )
11521141
11531142
1154- def apple_silicon_cache_size (cache_level : int ) -> int :
1143+ def apple_silicon_cache_size (cache_level : int ) -> int | None :
11551144 """Get the data cache_level size in bytes for Apple Silicon in MacOS.
11561145
11571146 Apple Silicon has two clusters, Performance (0) and Efficiency (1).
11581147 This function returns the data cache size for the Performance cluster.
1148+ Returns None if the cache size cannot be determined.
11591149 """
11601150 libc = ctypes .CDLL (ctypes .util .find_library ("c" ))
11611151 size = ctypes .c_size_t ()
@@ -1166,7 +1156,87 @@ def apple_silicon_cache_size(cache_level: int) -> int:
11661156 hwcachesize = f"hw.perflevel0.l{ cache_level } cachesize"
11671157 hwcachesize = hwcachesize .encode ("ascii" )
11681158 libc .sysctlbyname (hwcachesize , ctypes .byref (size ), ctypes .byref (ctypes .c_size_t (8 )), None , 0 )
1169- return size .value
1159+ return size .value if size .value > 0 else None
1160+
1161+
1162+ def windows_cache_size (cache_level : int ) -> int | None :
1163+ """Get the data cache size in bytes for Windows.
1164+
1165+ Semantics:
1166+ - L1: data cache only
1167+ - L2/L3: unified cache (data + instruction), as no split exists
1168+
1169+ Returns None if the cache size cannot be determined.
1170+ """
1171+ from ctypes import wintypes
1172+
1173+ if cache_level not in (1 , 2 , 3 ):
1174+ return None
1175+
1176+ # Windows constants
1177+ RelationCache = 2
1178+
1179+ # PROCESSOR_CACHE_TYPE enum values
1180+ CacheUnified = 0
1181+ CacheData = 2
1182+
1183+ # Header structure to read Relationship and Size first
1184+ class PROCESSOR_INFO_HEADER (ctypes .Structure ):
1185+ _fields_ : ClassVar [list ] = [
1186+ ("Relationship" , ctypes .c_int ),
1187+ ("Size" , ctypes .c_uint ),
1188+ ]
1189+
1190+ # Only the fields we need from CACHE_RELATIONSHIP (first 12 bytes)
1191+ class CACHE_RELATIONSHIP (ctypes .Structure ):
1192+ _fields_ : ClassVar [list ] = [
1193+ ("Level" , ctypes .c_ubyte ),
1194+ ("Associativity" , ctypes .c_ubyte ),
1195+ ("LineSize" , ctypes .c_ushort ),
1196+ ("CacheSize" , ctypes .c_uint ),
1197+ ("Type" , ctypes .c_uint ),
1198+ ]
1199+
1200+ kernel32 = ctypes .WinDLL ("kernel32" , use_last_error = True )
1201+
1202+ size = wintypes .DWORD (0 )
1203+
1204+ # Query buffer size
1205+ kernel32 .GetLogicalProcessorInformationEx (
1206+ RelationCache ,
1207+ None ,
1208+ ctypes .byref (size ),
1209+ )
1210+
1211+ buffer = ctypes .create_string_buffer (size .value )
1212+
1213+ # Retrieve cache info
1214+ kernel32 .GetLogicalProcessorInformationEx (
1215+ RelationCache ,
1216+ buffer ,
1217+ ctypes .byref (size ),
1218+ )
1219+
1220+ offset = 0
1221+ header_size = ctypes .sizeof (PROCESSOR_INFO_HEADER )
1222+
1223+ while offset < size .value :
1224+ # Read header to get Size for advancing offset
1225+ header = PROCESSOR_INFO_HEADER .from_buffer_copy (buffer [offset : offset + header_size ])
1226+
1227+ if header .Relationship == RelationCache :
1228+ # Read cache info starting after the header
1229+ cache = CACHE_RELATIONSHIP .from_buffer_copy (buffer [offset + header_size :])
1230+
1231+ if cache .Level == cache_level and (
1232+ (cache_level == 1 and cache .Type == CacheData )
1233+ or (cache_level > 1 and cache .Type == CacheUnified )
1234+ ):
1235+ return cache .CacheSize
1236+
1237+ offset += header .Size
1238+
1239+ return None
11701240
11711241
11721242def get_cache_info (cache_level : int ) -> tuple :
@@ -1197,19 +1267,21 @@ def get_cache_info(cache_level: int) -> tuple:
11971267 raise ValueError (f"L{ cache_level } cache not found in lscpu output" )
11981268
11991269
1200- def linux_cache_size (cache_level : int , default_size : int ) -> int :
1201- """Get the data cache_level size in bytes for Linux."""
1202- cache_size = default_size
1270+ def linux_cache_size (cache_level : int ) -> int | None :
1271+ """Get the data cache_level size in bytes for Linux.
1272+
1273+ Returns None if the cache size cannot be determined.
1274+ """
12031275 try :
12041276 # Try to read the cache size from sysfs
12051277 with open (f"/sys/devices/system/cpu/cpu0/cache/index{ cache_level } /size" ) as f :
12061278 size = f .read ()
12071279 if size .endswith ("K\n " ):
1208- cache_size = int (size [:- 2 ]) * 2 ** 10
1280+ return int (size [:- 2 ]) * 2 ** 10
12091281 elif size .endswith ("M\n " ):
1210- cache_size = int (size [:- 2 ]) * 2 ** 20
1282+ return int (size [:- 2 ]) * 2 ** 20
12111283 elif size .endswith ("G\n " ):
1212- cache_size = int (size [:- 2 ]) * 2 ** 30
1284+ return int (size [:- 2 ]) * 2 ** 30
12131285 except FileNotFoundError :
12141286 # Try with lscpu, if available.
12151287 try :
@@ -1219,70 +1291,69 @@ def linux_cache_size(cache_level: int, default_size: int) -> int:
12191291 # In general, dividing the cache size by the number of instances would bring
12201292 # best performance for private caches (L1 and L2). For shared caches (L3),
12211293 # this should be the case as well, but more experimentation is needed.
1222- cache_size //= cache_instances
1223- return cache_size
1294+ return cache_size // cache_instances
12241295 except (FileNotFoundError , ValueError ):
1225- # If lscpu is not available or the cache size cannot be read from sysfs,
1226- # return the default size.
12271296 pass
1228- return cache_size
1297+ return None
12291298
12301299
1231- def _get_cpu_info ():
1232- if blosc2 .IS_WASM :
1233- # Emscripten/wasm32 does not have access to CPU information.
1234- # Populate it with some reasonable defaults.
1235- return {
1236- "brand" : "Emscripten" ,
1237- "arch" : "wasm32" ,
1238- "count" : 1 ,
1239- "l1_data_cache_size" : 32 * 1024 ,
1240- "l2_cache_size" : 256 * 1024 ,
1241- "l3_cache_size" : 1024 * 1024 ,
1242- }
1243- cpu_info = cpuinfo .get_cpu_info ()
1244- # cpuinfo does not correctly retrieve the cache sizes for Apple Silicon, so do it manually
1245- if platform .system () == "Darwin" :
1246- cpu_info ["l1_data_cache_size" ] = apple_silicon_cache_size (1 )
1247- cpu_info ["l2_cache_size" ] = apple_silicon_cache_size (2 )
1248- cpu_info ["l3_cache_size" ] = apple_silicon_cache_size (3 )
1249- # cpuinfo does not correctly retrieve the cache sizes for all CPUs on Linux, so ask the kernel
1250- if platform .system () == "Linux" :
1251- l1_data_cache_size = cpu_info .get ("l1_data_cache_size" , 32 * 1024 )
1252- # Cache level 0 is typically the L1 data cache, and level 1 is the L1 instruction cache
1253- cpu_info ["l1_data_cache_size" ] = linux_cache_size (0 , l1_data_cache_size )
1254- l2_cache_size = cpu_info .get ("l2_cache_size" , 256 * 1024 )
1255- cpu_info ["l2_cache_size" ] = linux_cache_size (2 , l2_cache_size )
1256- l3_cache_size = cpu_info .get ("l3_cache_size" , 1024 * 1024 )
1257- cpu_info ["l3_cache_size" ] = linux_cache_size (3 , l3_cache_size )
1258- return cpu_info
1259-
1300+ def _available_cpus () -> int :
1301+ try :
1302+ # On Linux, this returns the number of CPUs available to the process,
1303+ # which may be less than os.cpu_count() due to CPU affinity settings.
1304+ return len (os .sched_getaffinity (0 ))
1305+ except AttributeError :
1306+ # os.sched_getaffinity is not available on all platforms
1307+ return os .cpu_count () or 1
12601308
1261- def write_cached_cpu_info (cpu_info_dict : dict [str , Any ]) -> None :
1262- _USER_CACHE_DIR .mkdir (parents = True , exist_ok = True )
1263- with (_USER_CACHE_DIR / "cpuinfo.json" ).open ("w" ) as f :
1264- json .dump (cpu_info_dict , f , indent = 4 )
12651309
1310+ def _update_cache_sizes (
1311+ cpu_info : dict , cache_size_func : Callable [[int ], int | None ], levels : tuple [int , int , int ]
1312+ ) -> None :
1313+ """Update cpu_info with cache sizes from the given function.
12661314
1267- def read_cached_cpu_info () -> dict [str , Any ]:
1268- try :
1269- with (_USER_CACHE_DIR / "cpuinfo.json" ).open () as f :
1270- return json .load (f )
1271- except (FileNotFoundError , json .JSONDecodeError ):
1272- return {}
1315+ Args:
1316+ cpu_info: Dictionary to update with cache sizes.
1317+ cache_size_func: Function that takes a cache level and returns size or None.
1318+ levels: Tuple of (l1_level, l2_level, l3_level) to pass to cache_size_func.
1319+ """
1320+ l1_level , l2_level , l3_level = levels
1321+ if (l1_data_cache_size := cache_size_func (l1_level )) is not None :
1322+ cpu_info ["l1_data_cache_size" ] = l1_data_cache_size
1323+ if (l2_cache_size := cache_size_func (l2_level )) is not None :
1324+ cpu_info ["l2_cache_size" ] = l2_cache_size
1325+ if (l3_cache_size := cache_size_func (l3_level )) is not None :
1326+ cpu_info ["l3_cache_size" ] = l3_cache_size
12731327
12741328
12751329@lru_cache (maxsize = 1 )
1276- def get_cpu_info () -> dict :
1277- cached_info = read_cached_cpu_info ()
1278- if cached_info :
1279- return cached_info
1280-
1281- cpu_info_dict = _get_cpu_info ()
1282- with contextlib .suppress (OSError ):
1283- # In case cpu info cannot be stored, will need to be recomputed in the next process
1284- write_cached_cpu_info (cpu_info_dict )
1285- return cpu_info_dict
1330+ def get_cpu_info ():
1331+ """
1332+ Construct the result of cpuinfo.get_cpu_info(), without actually using
1333+ cpuinfo.get_cpu_info() since that function takes 1s to run and this method is ran
1334+ at import time.
1335+ """
1336+ cpu_info = {
1337+ "count" : _available_cpus (),
1338+ "l1_data_cache_size" : 32 * 1024 ,
1339+ "l2_cache_size" : 256 * 1024 ,
1340+ "l3_cache_size" : 1024 * 1024 ,
1341+ }
1342+
1343+ if blosc2 .IS_WASM :
1344+ # Emscripten/wasm32 does not have access to CPU information.
1345+ # Return defaults.
1346+ return cpu_info
1347+
1348+ if platform .system () == "Darwin" :
1349+ _update_cache_sizes (cpu_info , apple_silicon_cache_size , (1 , 2 , 3 ))
1350+ elif platform .system () == "Linux" :
1351+ # Cache level 0 is typically the L1 data cache, and level 1 is the L1 instruction cache
1352+ _update_cache_sizes (cpu_info , linux_cache_size , (0 , 2 , 3 ))
1353+ elif platform .system () == "Windows" :
1354+ _update_cache_sizes (cpu_info , windows_cache_size , (1 , 2 , 3 ))
1355+
1356+ return cpu_info
12861357
12871358
12881359def get_blocksize () -> int :
0 commit comments