# Authors: Travis Oliphant, Matthew Brett
Base classes for MATLAB file stream reading.
MATLAB is a registered trademark of the Mathworks inc. """
else: byteord = ord
{'file_arg': '''file_name : str Name of the mat file (do not need .mat extension if appendmat==True) Can also pass open file-like object.''', 'append_arg': '''appendmat : bool, optional True to append the .mat extension to the end of the given filename, if not already present.''', 'load_args': '''byte_order : str or None, optional None by default, implying byte order guessed from mat file. Otherwise can be one of ('native', '=', 'little', '<', 'BIG', '>'). mat_dtype : bool, optional If True, return arrays in same dtype as would be loaded into MATLAB (instead of the dtype with which they are saved). squeeze_me : bool, optional Whether to squeeze unit matrix dimensions or not. chars_as_strings : bool, optional Whether to convert char arrays to string arrays. matlab_compatible : bool, optional Returns matrices as would be loaded by MATLAB (implies squeeze_me=False, chars_as_strings=False, mat_dtype=True, struct_as_record=True).''', 'struct_arg': '''struct_as_record : bool, optional Whether to load MATLAB structs as numpy record arrays, or as old-style numpy arrays with dtype=object. Setting this flag to False replicates the behavior of scipy version 0.7.x (returning numpy object arrays). The default setting is True, because it allows easier round-trip load and save of MATLAB files.''', 'matstream_arg': '''mat_stream : file-like Object with file API, open for reading.''', 'long_fields': '''long_field_names : bool, optional * False - maximum field name length in a structure is 31 characters which is the documented maximum length. This is the default. * True - maximum field name length in a structure is 63 characters which works for MATLAB 7.6''', 'do_compression': '''do_compression : bool, optional Whether to compress matrices on write. Default is False.''', 'oned_as': '''oned_as : {'row', 'column'}, optional If 'column', write 1-D numpy arrays as column vectors. If 'row', write 1D numpy arrays as row vectors.''', 'unicode_strings': '''unicode_strings : bool, optional If True, write strings as Unicode, else MATLAB usual encoding.'''}
'''
Note on architecture ======================
There are three sets of parameters relevant for reading files. The first are *file read parameters* - containing options that are common for reading the whole file, and therefore every variable within that file. At the moment these are:
* mat_stream * dtypes (derived from byte code) * byte_order * chars_as_strings * squeeze_me * struct_as_record (MATLAB 5 files) * class_dtypes (derived from order code, MATLAB 5 files) * codecs (MATLAB 5 files) * uint16_codec (MATLAB 5 files)
Another set of parameters are those that apply only to the current variable being read - the *header*:
* header related variables (different for v4 and v5 mat files) * is_complex * mclass * var_stream
With the header, we need ``next_position`` to tell us where the next variable in the stream is.
Then, for each element in a matrix, there can be *element read parameters*. An element is, for example, one element in a MATLAB cell array. At the moment these are:
* mat_dtype
The file-reading object contains the *file read parameters*. The *header* is passed around as a data object, or may be read and discarded in a single function. The *element read parameters* - the mat_dtype in this instance, is passed into a general post-processing function - see ``mio_utils`` for details. '''
''' Convert dtypes in mapping to given order
Parameters ---------- dtype_template : mapping mapping with values returning numpy dtype from ``np.dtype(val)`` order_code : str an order code suitable for using in ``dtype.newbyteorder()``
Returns ------- dtypes : mapping mapping where values have been replaced by ``np.dtype(val).newbyteorder(order_code)``
'''
""" Generic get of byte stream data of known type
Parameters ---------- mat_stream : file_like object MATLAB (tm) mat file stream a_dtype : dtype dtype of array to read. `a_dtype` is assumed to be correct endianness.
Returns ------- arr : ndarray Array of dtype `a_dtype` read from stream.
""" num_bytes = a_dtype.itemsize arr = np.ndarray(shape=(), dtype=a_dtype, buffer=mat_stream.read(num_bytes), order='F') return arr
""" Return major, minor tuple depending on apparent mat file type
Where:
#. 0,x -> version 4 format mat files #. 1,x -> version 5 format mat files #. 2,x -> version 7.3 format mat files (HDF format)
Parameters ---------- fileobj : file_like object implementing seek() and read()
Returns ------- major_version : {0, 1, 2} major MATLAB File format version minor_version : int minor MATLAB file format version
Raises ------ MatReadError If the file is empty. ValueError The matfile version is unknown.
Notes ----- Has the side effect of setting the file read pointer to 0 """ # Mat4 files have a zero somewhere in first 4 bytes fileobj.seek(0) mopt_bytes = fileobj.read(4) if len(mopt_bytes) == 0: raise MatReadError("Mat file appears to be empty") mopt_ints = np.ndarray(shape=(4,), dtype=np.uint8, buffer=mopt_bytes) if 0 in mopt_ints: fileobj.seek(0) return (0,0) # For 5 format or 7.3 format we need to read an integer in the # header. Bytes 124 through 128 contain a version integer and an # endian test string fileobj.seek(124) tst_str = fileobj.read(4) fileobj.seek(0) maj_ind = int(tst_str[2] == b'I'[0]) maj_val = byteord(tst_str[maj_ind]) min_val = byteord(tst_str[1-maj_ind]) ret = (maj_val, min_val) if maj_val in (1, 2): return ret raise ValueError('Unknown mat file type, version %s, %s' % ret)
""" Determine equivalent MATLAB dimensions for given array
Parameters ---------- arr : ndarray Input array oned_as : {'column', 'row'}, optional Whether 1-D arrays are returned as MATLAB row or column matrices. Default is 'column'.
Returns ------- dims : tuple Shape tuple, in the form MATLAB expects it.
Notes ----- We had to decide what shape a 1 dimensional array would be by default. ``np.atleast_2d`` thinks it is a row vector. The default for a vector in MATLAB (e.g. ``>> 1:12``) is a row vector.
Versions of scipy up to and including 0.11 resulted (accidentally) in 1-D arrays being read as column vectors. For the moment, we maintain the same tradition here.
Examples -------- >>> matdims(np.array(1)) # numpy scalar (1, 1) >>> matdims(np.array([1])) # 1d array, 1 element (1, 1) >>> matdims(np.array([1,2])) # 1d array, 2 elements (2, 1) >>> matdims(np.array([[2],[3]])) # 2d array, column vector (2, 1) >>> matdims(np.array([[2,3]])) # 2d array, row vector (1, 2) >>> matdims(np.array([[[2,3]]])) # 3d array, rowish vector (1, 1, 2) >>> matdims(np.array([])) # empty 1d array (0, 0) >>> matdims(np.array([[]])) # empty 2d (0, 0) >>> matdims(np.array([[[]]])) # empty 3d (0, 0, 0)
Optional argument flips 1-D shape behavior.
>>> matdims(np.array([1,2]), 'row') # 1d array, 2 elements (1, 2)
The argument has to make sense though
>>> matdims(np.array([1,2]), 'bizarre') Traceback (most recent call last): ... ValueError: 1D option "bizarre" is strange
""" shape = arr.shape if shape == (): # scalar return (1,1) if reduce(operator.mul, shape) == 0: # zero elememts return (0,) * np.max([arr.ndim, 2]) if len(shape) == 1: # 1D if oned_as == 'column': return shape + (1,) elif oned_as == 'row': return (1,) + shape else: raise ValueError('1D option "%s" is strange' % oned_as) return shape
''' Abstract class defining required interface for var readers''' pass
''' Returns header ''' pass
''' Reads array given header ''' pass
""" Base object for reading mat files
To make this class functional, you will need to override the following methods:
matrix_getter_factory - gives object to fetch next matrix from stream guess_byte_order - guesses file byte order from file """
byte_order=None, mat_dtype=False, squeeze_me=False, chars_as_strings=True, matlab_compatible=False, struct_as_record=True, verify_compressed_data_integrity=True ): ''' Initializer for mat file reader
mat_stream : file-like object with file API, open for reading %(load_args)s ''' # Initialize stream self.mat_stream = mat_stream self.dtypes = {} if not byte_order: byte_order = self.guess_byte_order() else: byte_order = boc.to_numpy_code(byte_order) self.byte_order = byte_order self.struct_as_record = struct_as_record if matlab_compatible: self.set_matlab_compatible() else: self.squeeze_me = squeeze_me self.chars_as_strings = chars_as_strings self.mat_dtype = mat_dtype self.verify_compressed_data_integrity = verify_compressed_data_integrity
''' Sets options to return arrays as MATLAB loads them ''' self.mat_dtype = True self.squeeze_me = False self.chars_as_strings = False
''' As we do not know what file type we have, assume native ''' return boc.native_code
b = self.mat_stream.read(1) curpos = self.mat_stream.tell() self.mat_stream.seek(curpos-1) return len(b) == 0
''' Return dtype for given number of items per element''' return np.dtype(arr.dtype.str[:2] + str(num))
''' Convert string array to char array ''' dims = list(arr.shape) if not dims: dims = [1] dims.append(int(arr.dtype.str[2:])) arr = np.ndarray(shape=dims, dtype=arr_dtype_number(arr, 1), buffer=arr) empties = [arr == ''] if not np.any(empties): return arr arr = arr.copy() arr[empties] = ' ' return arr |