1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

# Authors: Travis Oliphant, Matthew Brett 

 

""" 

Base classes for MATLAB file stream reading. 

 

MATLAB is a registered trademark of the Mathworks inc. 

""" 

from __future__ import division, print_function, absolute_import 

 

import sys 

import operator 

 

from scipy._lib.six import reduce 

 

import numpy as np 

 

if sys.version_info[0] >= 3: 

byteord = int 

else: 

byteord = ord 

 

from scipy.misc import doccer 

 

from . import byteordercodes as boc 

 

 

class MatReadError(Exception): 

pass 

 

 

class MatWriteError(Exception): 

pass 

 

 

class MatReadWarning(UserWarning): 

pass 

 

 

doc_dict = \ 

{'file_arg': 

'''file_name : str 

Name of the mat file (do not need .mat extension if 

appendmat==True) Can also pass open file-like object.''', 

'append_arg': 

'''appendmat : bool, optional 

True to append the .mat extension to the end of the given 

filename, if not already present.''', 

'load_args': 

'''byte_order : str or None, optional 

None by default, implying byte order guessed from mat 

file. Otherwise can be one of ('native', '=', 'little', '<', 

'BIG', '>'). 

mat_dtype : bool, optional 

If True, return arrays in same dtype as would be loaded into 

MATLAB (instead of the dtype with which they are saved). 

squeeze_me : bool, optional 

Whether to squeeze unit matrix dimensions or not. 

chars_as_strings : bool, optional 

Whether to convert char arrays to string arrays. 

matlab_compatible : bool, optional 

Returns matrices as would be loaded by MATLAB (implies 

squeeze_me=False, chars_as_strings=False, mat_dtype=True, 

struct_as_record=True).''', 

'struct_arg': 

'''struct_as_record : bool, optional 

Whether to load MATLAB structs as numpy record arrays, or as 

old-style numpy arrays with dtype=object. Setting this flag to 

False replicates the behavior of scipy version 0.7.x (returning 

numpy object arrays). The default setting is True, because it 

allows easier round-trip load and save of MATLAB files.''', 

'matstream_arg': 

'''mat_stream : file-like 

Object with file API, open for reading.''', 

'long_fields': 

'''long_field_names : bool, optional 

* False - maximum field name length in a structure is 31 characters 

which is the documented maximum length. This is the default. 

* True - maximum field name length in a structure is 63 characters 

which works for MATLAB 7.6''', 

'do_compression': 

'''do_compression : bool, optional 

Whether to compress matrices on write. Default is False.''', 

'oned_as': 

'''oned_as : {'row', 'column'}, optional 

If 'column', write 1-D numpy arrays as column vectors. 

If 'row', write 1D numpy arrays as row vectors.''', 

'unicode_strings': 

'''unicode_strings : bool, optional 

If True, write strings as Unicode, else MATLAB usual encoding.'''} 

 

docfiller = doccer.filldoc(doc_dict) 

 

''' 

 

Note on architecture 

====================== 

 

There are three sets of parameters relevant for reading files. The 

first are *file read parameters* - containing options that are common 

for reading the whole file, and therefore every variable within that 

file. At the moment these are: 

 

* mat_stream 

* dtypes (derived from byte code) 

* byte_order 

* chars_as_strings 

* squeeze_me 

* struct_as_record (MATLAB 5 files) 

* class_dtypes (derived from order code, MATLAB 5 files) 

* codecs (MATLAB 5 files) 

* uint16_codec (MATLAB 5 files) 

 

Another set of parameters are those that apply only to the current 

variable being read - the *header*: 

 

* header related variables (different for v4 and v5 mat files) 

* is_complex 

* mclass 

* var_stream 

 

With the header, we need ``next_position`` to tell us where the next 

variable in the stream is. 

 

Then, for each element in a matrix, there can be *element read 

parameters*. An element is, for example, one element in a MATLAB cell 

array. At the moment these are: 

 

* mat_dtype 

 

The file-reading object contains the *file read parameters*. The 

*header* is passed around as a data object, or may be read and discarded 

in a single function. The *element read parameters* - the mat_dtype in 

this instance, is passed into a general post-processing function - see 

``mio_utils`` for details. 

''' 

 

 

def convert_dtypes(dtype_template, order_code): 

''' Convert dtypes in mapping to given order 

 

Parameters 

---------- 

dtype_template : mapping 

mapping with values returning numpy dtype from ``np.dtype(val)`` 

order_code : str 

an order code suitable for using in ``dtype.newbyteorder()`` 

 

Returns 

------- 

dtypes : mapping 

mapping where values have been replaced by 

``np.dtype(val).newbyteorder(order_code)`` 

 

''' 

dtypes = dtype_template.copy() 

for k in dtypes: 

dtypes[k] = np.dtype(dtypes[k]).newbyteorder(order_code) 

return dtypes 

 

 

def read_dtype(mat_stream, a_dtype): 

""" 

Generic get of byte stream data of known type 

 

Parameters 

---------- 

mat_stream : file_like object 

MATLAB (tm) mat file stream 

a_dtype : dtype 

dtype of array to read. `a_dtype` is assumed to be correct 

endianness. 

 

Returns 

------- 

arr : ndarray 

Array of dtype `a_dtype` read from stream. 

 

""" 

num_bytes = a_dtype.itemsize 

arr = np.ndarray(shape=(), 

dtype=a_dtype, 

buffer=mat_stream.read(num_bytes), 

order='F') 

return arr 

 

 

def get_matfile_version(fileobj): 

""" 

Return major, minor tuple depending on apparent mat file type 

 

Where: 

 

#. 0,x -> version 4 format mat files 

#. 1,x -> version 5 format mat files 

#. 2,x -> version 7.3 format mat files (HDF format) 

 

Parameters 

---------- 

fileobj : file_like 

object implementing seek() and read() 

 

Returns 

------- 

major_version : {0, 1, 2} 

major MATLAB File format version 

minor_version : int 

minor MATLAB file format version 

 

Raises 

------ 

MatReadError 

If the file is empty. 

ValueError 

The matfile version is unknown. 

 

Notes 

----- 

Has the side effect of setting the file read pointer to 0 

""" 

# Mat4 files have a zero somewhere in first 4 bytes 

fileobj.seek(0) 

mopt_bytes = fileobj.read(4) 

if len(mopt_bytes) == 0: 

raise MatReadError("Mat file appears to be empty") 

mopt_ints = np.ndarray(shape=(4,), dtype=np.uint8, buffer=mopt_bytes) 

if 0 in mopt_ints: 

fileobj.seek(0) 

return (0,0) 

# For 5 format or 7.3 format we need to read an integer in the 

# header. Bytes 124 through 128 contain a version integer and an 

# endian test string 

fileobj.seek(124) 

tst_str = fileobj.read(4) 

fileobj.seek(0) 

maj_ind = int(tst_str[2] == b'I'[0]) 

maj_val = byteord(tst_str[maj_ind]) 

min_val = byteord(tst_str[1-maj_ind]) 

ret = (maj_val, min_val) 

if maj_val in (1, 2): 

return ret 

raise ValueError('Unknown mat file type, version %s, %s' % ret) 

 

 

def matdims(arr, oned_as='column'): 

""" 

Determine equivalent MATLAB dimensions for given array 

 

Parameters 

---------- 

arr : ndarray 

Input array 

oned_as : {'column', 'row'}, optional 

Whether 1-D arrays are returned as MATLAB row or column matrices. 

Default is 'column'. 

 

Returns 

------- 

dims : tuple 

Shape tuple, in the form MATLAB expects it. 

 

Notes 

----- 

We had to decide what shape a 1 dimensional array would be by 

default. ``np.atleast_2d`` thinks it is a row vector. The 

default for a vector in MATLAB (e.g. ``>> 1:12``) is a row vector. 

 

Versions of scipy up to and including 0.11 resulted (accidentally) 

in 1-D arrays being read as column vectors. For the moment, we 

maintain the same tradition here. 

 

Examples 

-------- 

>>> matdims(np.array(1)) # numpy scalar 

(1, 1) 

>>> matdims(np.array([1])) # 1d array, 1 element 

(1, 1) 

>>> matdims(np.array([1,2])) # 1d array, 2 elements 

(2, 1) 

>>> matdims(np.array([[2],[3]])) # 2d array, column vector 

(2, 1) 

>>> matdims(np.array([[2,3]])) # 2d array, row vector 

(1, 2) 

>>> matdims(np.array([[[2,3]]])) # 3d array, rowish vector 

(1, 1, 2) 

>>> matdims(np.array([])) # empty 1d array 

(0, 0) 

>>> matdims(np.array([[]])) # empty 2d 

(0, 0) 

>>> matdims(np.array([[[]]])) # empty 3d 

(0, 0, 0) 

 

Optional argument flips 1-D shape behavior. 

 

>>> matdims(np.array([1,2]), 'row') # 1d array, 2 elements 

(1, 2) 

 

The argument has to make sense though 

 

>>> matdims(np.array([1,2]), 'bizarre') 

Traceback (most recent call last): 

... 

ValueError: 1D option "bizarre" is strange 

 

""" 

shape = arr.shape 

if shape == (): # scalar 

return (1,1) 

if reduce(operator.mul, shape) == 0: # zero elememts 

return (0,) * np.max([arr.ndim, 2]) 

if len(shape) == 1: # 1D 

if oned_as == 'column': 

return shape + (1,) 

elif oned_as == 'row': 

return (1,) + shape 

else: 

raise ValueError('1D option "%s" is strange' 

% oned_as) 

return shape 

 

 

class MatVarReader(object): 

''' Abstract class defining required interface for var readers''' 

def __init__(self, file_reader): 

pass 

 

def read_header(self): 

''' Returns header ''' 

pass 

 

def array_from_header(self, header): 

''' Reads array given header ''' 

pass 

 

 

class MatFileReader(object): 

""" Base object for reading mat files 

 

To make this class functional, you will need to override the 

following methods: 

 

matrix_getter_factory - gives object to fetch next matrix from stream 

guess_byte_order - guesses file byte order from file 

""" 

 

@docfiller 

def __init__(self, mat_stream, 

byte_order=None, 

mat_dtype=False, 

squeeze_me=False, 

chars_as_strings=True, 

matlab_compatible=False, 

struct_as_record=True, 

verify_compressed_data_integrity=True 

): 

''' 

Initializer for mat file reader 

 

mat_stream : file-like 

object with file API, open for reading 

%(load_args)s 

''' 

# Initialize stream 

self.mat_stream = mat_stream 

self.dtypes = {} 

if not byte_order: 

byte_order = self.guess_byte_order() 

else: 

byte_order = boc.to_numpy_code(byte_order) 

self.byte_order = byte_order 

self.struct_as_record = struct_as_record 

if matlab_compatible: 

self.set_matlab_compatible() 

else: 

self.squeeze_me = squeeze_me 

self.chars_as_strings = chars_as_strings 

self.mat_dtype = mat_dtype 

self.verify_compressed_data_integrity = verify_compressed_data_integrity 

 

def set_matlab_compatible(self): 

''' Sets options to return arrays as MATLAB loads them ''' 

self.mat_dtype = True 

self.squeeze_me = False 

self.chars_as_strings = False 

 

def guess_byte_order(self): 

''' As we do not know what file type we have, assume native ''' 

return boc.native_code 

 

def end_of_stream(self): 

b = self.mat_stream.read(1) 

curpos = self.mat_stream.tell() 

self.mat_stream.seek(curpos-1) 

return len(b) == 0 

 

 

def arr_dtype_number(arr, num): 

''' Return dtype for given number of items per element''' 

return np.dtype(arr.dtype.str[:2] + str(num)) 

 

 

def arr_to_chars(arr): 

''' Convert string array to char array ''' 

dims = list(arr.shape) 

if not dims: 

dims = [1] 

dims.append(int(arr.dtype.str[2:])) 

arr = np.ndarray(shape=dims, 

dtype=arr_dtype_number(arr, 1), 

buffer=arr) 

empties = [arr == ''] 

if not np.any(empties): 

return arr 

arr = arr.copy() 

arr[empties] = ' ' 

return arr