[docs]@staticmethoddefis_simple_type(t):"""returns if the type t is string or a number so that it does not use pickle if serialized"""returntinNumpyHelper.SIMPLE_TYPES
[docs]@staticmethoddefget_numpy_representation(obj):"""converts object to representation that can be stored without pickle enables in numpy arrays; if it is an object or a dict, it will be serialized to a json string"""ifobjisNone:return''eliftype(obj)inNumpyHelper.SIMPLE_TYPES:returnobjelifisinstance(obj,Enum):returnobj.nameelifnotisinstance(obj,dict)andnotisinstance(obj,list)andnotisinstance(obj,Enum):representation=vars(obj)else:representation=objreturnjson.dumps(representation,default=lambdax:x.nameifisinstance(x,Enum)elsestr(x))
[docs]@staticmethoddefcreate_memmap_array_in_cache(shape:tuple,data:np.ndarray=None)->np.ndarray:"""Creates a memory-mapped array and optionally initializes it with data."""importuuiddir_path=PathBuilder.build(EnvironmentSettings.get_cache_path()/"memmap_storage")memmap_path=dir_path/f"temp_{uuid.uuid4()}.npy"ifdataisnotNone:data.astype('float32').tofile(memmap_path)returnnp.memmap(memmap_path,dtype='float32',mode='r+',shape=shape)else:returnnp.memmap(memmap_path,dtype='float32',mode='w+',shape=shape)
[docs]@staticmethoddefconcat_arrays_rowwise(arrays:list,force='auto',dense_max_mb=100,use_memmap=False):""" Concatenate 2D numpy arrays or sparse matrices row-wise. Parameters ---------- arrays : list of np.ndarray or scipy.sparse matrices force : {"auto", "dense", "sparse"} - "auto": use memory-based heuristic (default) - "dense": always return numpy.ndarray - "sparse": always return scipy.sparse.csr_matrix dense_max_mb : int Threshold for converting sparse -> dense in "auto" mode. use_memmap: bool """ifnotarrays:raiseValueError("No matrices provided")fromscipyimportsparseifany(sparse.issparse(array)forarrayinarrays):# Convert all to sparse (CSR for efficiency)matrices=[array.astype(np.float32)ifsparse.issparse(array)elsesparse.csr_matrix(array).astype(np.float32)forarrayinarrays]result=sparse.hstack(matrices,format="csr")# Estimate dense memory sizesize_in_mb=result.shape[0]*result.shape[1]*result.dtype.itemsize/(1024*1024)ifsize_in_mb<=dense_max_mb:result=result.toarray()else:# All are numpy arraysresult=np.hstack(arrays)ifnp.isnan(result).any():importinspectlogging.error(f"NumpyHelper: NaN values found in concatenated array; called from {inspect.stack()[1].function}")raiseRuntimeError('NumpyHelper: NaN values found in concatenated array')ifforce=="dense"andsparse.issparse(result):result=result.toarray()elifforce=="sparse"andnotsparse.issparse(result):result=sparse.csr_matrix(result)ifuse_memmap:result=NumpyHelper.create_memmap_array_in_cache(result.shape,result)returnresult