7878 "base64" ,
7979)
8080
81- _allownumpy = ("_ArraySize_" , "_ArrayData_" , "_ArrayZipSize_" , "_ArrayZipData_" )
81+ _allownumpy = (
82+ "_ArraySize_" ,
83+ "_ArrayData_" ,
84+ "_ArrayZipSize_" ,
85+ "_ArrayZipData_" ,
86+ "_ArrayIsSparse_" ,
87+ "_ArrayIsComplex_" ,
88+ )
8289
8390##====================================================================================
8491## Python to JData encoding function
8592##====================================================================================
8693
8794
95+ def _compress_data (rawbytes , opt ):
96+ """Compress raw bytes using the codec specified in opt['compression']."""
97+ codec = opt ["compression" ]
98+ if codec == "zlib" :
99+ return zlib .compress (rawbytes )
100+ elif codec == "gzip" :
101+ gzipper = zlib .compressobj (wbits = (zlib .MAX_WBITS | 16 ))
102+ result = gzipper .compress (rawbytes )
103+ result += gzipper .flush ()
104+ return result
105+ elif codec == "lzma" :
106+ return lzma .compress (rawbytes , lzma .FORMAT_ALONE )
107+ elif codec == "lz4" :
108+ import lz4 .frame
109+
110+ return lz4 .frame .compress (rawbytes )
111+ elif codec .startswith ("blosc2" ):
112+ import blosc2
113+
114+ BLOSC2CODEC = {
115+ "blosc2blosclz" : blosc2 .Codec .BLOSCLZ ,
116+ "blosc2lz4" : blosc2 .Codec .LZ4 ,
117+ "blosc2lz4hc" : blosc2 .Codec .LZ4HC ,
118+ "blosc2zlib" : blosc2 .Codec .ZLIB ,
119+ "blosc2zstd" : blosc2 .Codec .ZSTD ,
120+ }
121+ nthread = opt .get ("nthread" , 1 )
122+ return blosc2 .compress2 (rawbytes , codec = BLOSC2CODEC [codec ], nthreads = nthread )
123+ elif codec == "base64" :
124+ return rawbytes
125+ return rawbytes
126+
127+
128+ def _issparse (d ):
129+ """Check if d is a scipy sparse matrix without importing scipy at module level."""
130+ try :
131+ import scipy .sparse
132+
133+ return scipy .sparse .issparse (d )
134+ except ImportError :
135+ return False
136+
137+
88138def encode (d , opt = None , ** kwargs ):
89139 """
90140 Encode a Python data structure to portable JData-annotated dict constructs.
@@ -125,7 +175,7 @@ def encode(d, opt=None, **kwargs):
125175 if opt is None :
126176 opt = {}
127177 kwargs .setdefault ("compression" , "zlib" )
128- kwargs .setdefault ("compressarraysize" , 200 )
178+ kwargs .setdefault ("compressarraysize" , 300 )
129179 opt .setdefault ("inplace" , False )
130180 opt .update (kwargs )
131181
@@ -178,6 +228,46 @@ def encode(d, opt=None, **kwargs):
178228 "_ArrayData_" : [d .real , d .imag ],
179229 }
180230 return newobj
231+ elif _issparse (d ):
232+ import scipy .sparse
233+
234+ coo = d .tocoo ()
235+ newobj = {}
236+ val_dtype = coo .data .dtype if len (coo .data ) > 0 else np .float64
237+ if np .issubdtype (val_dtype , np .complexfloating ):
238+ real_dtype = val_dtype .type (0 ).real .dtype
239+ else :
240+ real_dtype = val_dtype
241+ newobj ["_ArrayType_" ] = jdtype .get (str (real_dtype ), str (real_dtype ))
242+ newobj ["_ArraySize_" ] = list (d .shape )
243+ newobj ["_ArrayIsSparse_" ] = True
244+ if np .issubdtype (val_dtype , np .complexfloating ):
245+ newobj ["_ArrayIsComplex_" ] = True
246+ newobj ["_ArrayData_" ] = [
247+ (coo .row + 1 ).astype (np .float64 ).tolist (),
248+ (coo .col + 1 ).astype (np .float64 ).tolist (),
249+ coo .data .real .astype (np .float64 ).tolist (),
250+ coo .data .imag .astype (np .float64 ).tolist (),
251+ ]
252+ else :
253+ newobj ["_ArrayData_" ] = [
254+ (coo .row + 1 ).astype (np .float64 ).tolist (),
255+ (coo .col + 1 ).astype (np .float64 ).tolist (),
256+ coo .data .astype (np .float64 ).tolist (),
257+ ]
258+ if "compression" in opt and opt ["compression" ] in _zipper :
259+ arraydata = np .array (newobj ["_ArrayData_" ])
260+ nrows = arraydata .shape [0 ]
261+ nnz = arraydata .shape [1 ] if arraydata .ndim > 1 else 0
262+ if nnz >= opt .get ("compressarraysize" , 300 ):
263+ rawbytes = arraydata .astype (np .float64 ).tobytes ()
264+ newobj ["_ArrayZipType_" ] = opt ["compression" ]
265+ newobj ["_ArrayZipSize_" ] = [nrows , nnz ]
266+ newobj ["_ArrayZipData_" ] = _compress_data (rawbytes , opt )
267+ if (("base64" in opt ) and (opt ["base64" ])) or opt ["compression" ] == "base64" :
268+ newobj ["_ArrayZipData_" ] = base64 .b64encode (newobj ["_ArrayZipData_" ])
269+ newobj .pop ("_ArrayData_" )
270+ return newobj
181271 elif isinstance (d , np .ndarray ) or np .iscomplex (d ):
182272 newobj = {}
183273 newobj ["_ArrayType_" ] = jdtype [str (d .dtype )] if (str (d .dtype ) in jdtype ) else str (d .dtype )
@@ -196,7 +286,7 @@ def encode(d, opt=None, **kwargs):
196286 else :
197287 newobj ["_ArrayData_" ] = d .ravel ()
198288
199- if "compression" in opt :
289+ if "compression" in opt and d . size >= opt . get ( "compressarraysize" , 300 ) :
200290 if opt ["compression" ] not in _zipper :
201291 raise Exception (
202292 "JData" ,
@@ -310,6 +400,65 @@ def decode(d, opt=None, **kwargs):
310400 return decodelist (list (d ), ** opt )
311401 elif isinstance (d , dict ):
312402 if "_ArrayType_" in d :
403+ # Early intercept for sparse arrays
404+ if "_ArrayIsSparse_" in d and d ["_ArrayIsSparse_" ]:
405+ try :
406+ import scipy .sparse
407+ except ImportError :
408+ raise ImportError ('To decode sparse JData, install scipy: "pip install scipy"' )
409+ shape = (
410+ tuple (d ["_ArraySize_" ])
411+ if isinstance (d ["_ArraySize_" ], list )
412+ else (d ["_ArraySize_" ],)
413+ )
414+ is_complex = "_ArrayIsComplex_" in d and d ["_ArrayIsComplex_" ]
415+ if "_ArrayZipData_" in d :
416+ # Decompress first
417+ newobj = d ["_ArrayZipData_" ]
418+ if isinstance (newobj , str ):
419+ newobj = newobj .encode ("ascii" )
420+ if ("base64" in opt and opt ["base64" ]) or (
421+ "_ArrayZipType_" in d and d ["_ArrayZipType_" ] == "base64"
422+ ):
423+ newobj = base64 .b64decode (newobj )
424+ if "_ArrayZipType_" in d and d ["_ArrayZipType_" ] != "base64" :
425+ if d ["_ArrayZipType_" ] == "zlib" :
426+ newobj = zlib .decompress (newobj )
427+ elif d ["_ArrayZipType_" ] == "gzip" :
428+ newobj = zlib .decompress (newobj , zlib .MAX_WBITS | 16 )
429+ elif d ["_ArrayZipType_" ] == "lzma" :
430+ buf = bytearray (newobj )
431+ if len (buf ) > 13 :
432+ buf [5 :13 ] = b"\xff \xff \xff \xff \xff \xff \xff \xff "
433+ newobj = lzma .decompress (buf , lzma .FORMAT_ALONE )
434+ elif d ["_ArrayZipType_" ] == "lz4" :
435+ import lz4 .frame
436+
437+ newobj = lz4 .frame .decompress (bytes (newobj ))
438+ elif d ["_ArrayZipType_" ].startswith ("blosc2" ):
439+ import blosc2
440+
441+ nthread = opt .get ("nthread" , 1 )
442+ newobj = blosc2 .decompress2 (
443+ bytes (newobj ), as_bytearray = False , nthreads = nthread
444+ )
445+ arraydata = np .frombuffer (bytearray (newobj ), dtype = np .float64 ).reshape (
446+ d ["_ArrayZipSize_" ]
447+ )
448+ else :
449+ arraydata = np .array (d ["_ArrayData_" ], dtype = np .float64 )
450+ if arraydata .ndim == 1 :
451+ nrows = 4 if is_complex else 3
452+ nnz = len (arraydata ) // nrows
453+ arraydata = arraydata .reshape (nrows , nnz )
454+ rows = arraydata [0 ].astype (np .intp ) - 1
455+ cols = arraydata [1 ].astype (np .intp ) - 1
456+ if is_complex :
457+ vals = arraydata [2 ] + 1j * arraydata [3 ]
458+ else :
459+ vals = arraydata [2 ]
460+ return scipy .sparse .csc_matrix ((vals , (rows , cols )), shape = shape )
461+
313462 if isinstance (d ["_ArraySize_" ], str ):
314463 d ["_ArraySize_" ] = np .frombuffer (bytearray (d ["_ArraySize_" ]))
315464 if "_ArrayZipData_" in d :
@@ -365,6 +514,35 @@ def decode(d, opt=None, **kwargs):
365514 newobj = np .frombuffer (bytearray (newobj ), dtype = np .dtype (d ["_ArrayType_" ])).reshape (
366515 d ["_ArrayZipSize_" ]
367516 )
517+ # Handle sparse arrays
518+ if "_ArrayIsSparse_" in d and d ["_ArrayIsSparse_" ]:
519+ try :
520+ import scipy .sparse
521+ except ImportError :
522+ raise ImportError (
523+ 'To decode sparse JData arrays, install scipy: "pip install scipy"'
524+ )
525+ shape = (
526+ tuple (d ["_ArraySize_" ])
527+ if isinstance (d ["_ArraySize_" ], list )
528+ else (d ["_ArraySize_" ],)
529+ )
530+ is_complex = "_ArrayIsComplex_" in d and d ["_ArrayIsComplex_" ]
531+ if isinstance (newobj , np .ndarray ):
532+ arraydata = newobj
533+ else :
534+ arraydata = np .array (newobj , dtype = np .float64 )
535+ if arraydata .ndim == 1 :
536+ nrows = 4 if is_complex else 3
537+ nnz = len (arraydata ) // nrows
538+ arraydata = arraydata .reshape (nrows , nnz )
539+ rows = arraydata [0 ].astype (np .intp ) - 1
540+ cols = arraydata [1 ].astype (np .intp ) - 1
541+ if is_complex :
542+ vals = arraydata [2 ] + 1j * arraydata [3 ]
543+ else :
544+ vals = arraydata [2 ]
545+ newobj = scipy .sparse .csc_matrix ((vals , (rows , cols )), shape = shape )
368546 if "_ArrayIsComplex_" in d and newobj .shape [0 ] == 2 :
369547 newobj = newobj [0 ] + 1j * newobj [1 ]
370548 if "_ArrayOrder_" in d and (
@@ -377,6 +555,7 @@ def decode(d, opt=None, **kwargs):
377555 newobj = newobj .reshape (d ["_ArraySize_" ])
378556 if not hasattr (d ["_ArraySize_" ], "__iter__" ) and d ["_ArraySize_" ] == 1 :
379557 newobj = newobj .item ()
558+ return newobj
380559 return newobj
381560 elif "_ArrayData_" in d :
382561 if isinstance (d ["_ArrayData_" ], str ):
0 commit comments