1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

''' Classes for read / write of matlab (TM) 5 files 

 

The matfile specification last found here: 

 

http://www.mathworks.com/access/helpdesk/help/pdf_doc/matlab/matfile_format.pdf 

 

(as of December 5 2008) 

''' 

from __future__ import division, print_function, absolute_import 

 

''' 

================================= 

Note on functions and mat files 

================================= 

 

The document above does not give any hints as to the storage of matlab 

function handles, or anonymous function handles. I had therefore to 

guess the format of matlab arrays of ``mxFUNCTION_CLASS`` and 

``mxOPAQUE_CLASS`` by looking at example mat files. 

 

``mxFUNCTION_CLASS`` stores all types of matlab functions. It seems to 

contain a struct matrix with a set pattern of fields. For anonymous 

functions, a sub-fields of one of these fields seems to contain the 

well-named ``mxOPAQUE_CLASS``. This seems to cotain: 

 

* array flags as for any matlab matrix 

* 3 int8 strings 

* a matrix 

 

It seems that, whenever the mat file contains a ``mxOPAQUE_CLASS`` 

instance, there is also an un-named matrix (name == '') at the end of 

the mat file. I'll call this the ``__function_workspace__`` matrix. 

 

When I saved two anonymous functions in a mat file, or appended another 

anonymous function to the mat file, there was still only one 

``__function_workspace__`` un-named matrix at the end, but larger than 

that for a mat file with a single anonymous function, suggesting that 

the workspaces for the two functions had been merged. 

 

The ``__function_workspace__`` matrix appears to be of double class 

(``mxCLASS_DOUBLE``), but stored as uint8, the memory for which is in 

the format of a mini .mat file, without the first 124 bytes of the file 

header (the description and the subsystem_offset), but with the version 

U2 bytes, and the S2 endian test bytes. There follow 4 zero bytes, 

presumably for 8 byte padding, and then a series of ``miMATRIX`` 

entries, as in a standard mat file. The ``miMATRIX`` entries appear to 

be series of un-named (name == '') matrices, and may also contain arrays 

of this same mini-mat format. 

 

I guess that: 

 

* saving an anonymous function back to a mat file will need the 

associated ``__function_workspace__`` matrix saved as well for the 

anonymous function to work correctly. 

* appending to a mat file that has a ``__function_workspace__`` would 

involve first pulling off this workspace, appending, checking whether 

there were any more anonymous functions appended, and then somehow 

merging the relevant workspaces, and saving at the end of the mat 

file. 

 

The mat files I was playing with are in ``tests/data``: 

 

* sqr.mat 

* parabola.mat 

* some_functions.mat 

 

See ``tests/test_mio.py:test_mio_funcs.py`` for a debugging 

script I was working with. 

 

''' 

 

# Small fragments of current code adapted from matfile.py by Heiko 

# Henkelmann 

 

import os 

import time 

import sys 

import zlib 

 

from io import BytesIO 

 

import warnings 

 

import numpy as np 

from numpy.compat import asbytes, asstr 

 

import scipy.sparse 

 

from scipy._lib.six import string_types 

 

from .byteordercodes import native_code, swapped_code 

 

from .miobase import (MatFileReader, docfiller, matdims, read_dtype, 

arr_to_chars, arr_dtype_number, MatWriteError, 

MatReadError, MatReadWarning) 

 

# Reader object for matlab 5 format variables 

from .mio5_utils import VarReader5 

 

# Constants and helper objects 

from .mio5_params import (MatlabObject, MatlabFunction, MDTYPES, NP_TO_MTYPES, 

NP_TO_MXTYPES, miCOMPRESSED, miMATRIX, miINT8, 

miUTF8, miUINT32, mxCELL_CLASS, mxSTRUCT_CLASS, 

mxOBJECT_CLASS, mxCHAR_CLASS, mxSPARSE_CLASS, 

mxDOUBLE_CLASS, mclass_info) 

 

from .streams import ZlibInputStream 

 

 

class MatFile5Reader(MatFileReader): 

''' Reader for Mat 5 mat files 

Adds the following attribute to base class 

 

uint16_codec - char codec to use for uint16 char arrays 

(defaults to system default codec) 

 

Uses variable reader that has the following stardard interface (see 

abstract class in ``miobase``:: 

 

__init__(self, file_reader) 

read_header(self) 

array_from_header(self) 

 

and added interface:: 

 

set_stream(self, stream) 

read_full_tag(self) 

 

''' 

@docfiller 

def __init__(self, 

mat_stream, 

byte_order=None, 

mat_dtype=False, 

squeeze_me=False, 

chars_as_strings=True, 

matlab_compatible=False, 

struct_as_record=True, 

verify_compressed_data_integrity=True, 

uint16_codec=None 

): 

'''Initializer for matlab 5 file format reader 

 

%(matstream_arg)s 

%(load_args)s 

%(struct_arg)s 

uint16_codec : {None, string} 

Set codec to use for uint16 char arrays (e.g. 'utf-8'). 

Use system default codec if None 

''' 

super(MatFile5Reader, self).__init__( 

mat_stream, 

byte_order, 

mat_dtype, 

squeeze_me, 

chars_as_strings, 

matlab_compatible, 

struct_as_record, 

verify_compressed_data_integrity 

) 

# Set uint16 codec 

if not uint16_codec: 

uint16_codec = sys.getdefaultencoding() 

self.uint16_codec = uint16_codec 

# placeholders for readers - see initialize_read method 

self._file_reader = None 

self._matrix_reader = None 

 

def guess_byte_order(self): 

''' Guess byte order. 

Sets stream pointer to 0 ''' 

self.mat_stream.seek(126) 

mi = self.mat_stream.read(2) 

self.mat_stream.seek(0) 

return mi == b'IM' and '<' or '>' 

 

def read_file_header(self): 

''' Read in mat 5 file header ''' 

hdict = {} 

hdr_dtype = MDTYPES[self.byte_order]['dtypes']['file_header'] 

hdr = read_dtype(self.mat_stream, hdr_dtype) 

hdict['__header__'] = hdr['description'].item().strip(b' \t\n\000') 

v_major = hdr['version'] >> 8 

v_minor = hdr['version'] & 0xFF 

hdict['__version__'] = '%d.%d' % (v_major, v_minor) 

return hdict 

 

def initialize_read(self): 

''' Run when beginning read of variables 

 

Sets up readers from parameters in `self` 

''' 

# reader for top level stream. We need this extra top-level 

# reader because we use the matrix_reader object to contain 

# compressed matrices (so they have their own stream) 

self._file_reader = VarReader5(self) 

# reader for matrix streams 

self._matrix_reader = VarReader5(self) 

 

def read_var_header(self): 

''' Read header, return header, next position 

 

Header has to define at least .name and .is_global 

 

Parameters 

---------- 

None 

 

Returns 

------- 

header : object 

object that can be passed to self.read_var_array, and that 

has attributes .name and .is_global 

next_position : int 

position in stream of next variable 

''' 

mdtype, byte_count = self._file_reader.read_full_tag() 

if not byte_count > 0: 

raise ValueError("Did not read any bytes") 

next_pos = self.mat_stream.tell() + byte_count 

if mdtype == miCOMPRESSED: 

# Make new stream from compressed data 

stream = ZlibInputStream(self.mat_stream, byte_count) 

self._matrix_reader.set_stream(stream) 

check_stream_limit = self.verify_compressed_data_integrity 

mdtype, byte_count = self._matrix_reader.read_full_tag() 

else: 

check_stream_limit = False 

self._matrix_reader.set_stream(self.mat_stream) 

if not mdtype == miMATRIX: 

raise TypeError('Expecting miMATRIX type here, got %d' % mdtype) 

header = self._matrix_reader.read_header(check_stream_limit) 

return header, next_pos 

 

def read_var_array(self, header, process=True): 

''' Read array, given `header` 

 

Parameters 

---------- 

header : header object 

object with fields defining variable header 

process : {True, False} bool, optional 

If True, apply recursive post-processing during loading of 

array. 

 

Returns 

------- 

arr : array 

array with post-processing applied or not according to 

`process`. 

''' 

return self._matrix_reader.array_from_header(header, process) 

 

def get_variables(self, variable_names=None): 

''' get variables from stream as dictionary 

 

variable_names - optional list of variable names to get 

 

If variable_names is None, then get all variables in file 

''' 

if isinstance(variable_names, string_types): 

variable_names = [variable_names] 

elif variable_names is not None: 

variable_names = list(variable_names) 

 

self.mat_stream.seek(0) 

# Here we pass all the parameters in self to the reading objects 

self.initialize_read() 

mdict = self.read_file_header() 

mdict['__globals__'] = [] 

while not self.end_of_stream(): 

hdr, next_position = self.read_var_header() 

name = asstr(hdr.name) 

if name in mdict: 

warnings.warn('Duplicate variable name "%s" in stream' 

' - replacing previous with new\n' 

'Consider mio5.varmats_from_mat to split ' 

'file into single variable files' % name, 

MatReadWarning, stacklevel=2) 

if name == '': 

# can only be a matlab 7 function workspace 

name = '__function_workspace__' 

# We want to keep this raw because mat_dtype processing 

# will break the format (uint8 as mxDOUBLE_CLASS) 

process = False 

else: 

process = True 

if variable_names is not None and name not in variable_names: 

self.mat_stream.seek(next_position) 

continue 

try: 

res = self.read_var_array(hdr, process) 

except MatReadError as err: 

warnings.warn( 

'Unreadable variable "%s", because "%s"' % 

(name, err), 

Warning, stacklevel=2) 

res = "Read error: %s" % err 

self.mat_stream.seek(next_position) 

mdict[name] = res 

if hdr.is_global: 

mdict['__globals__'].append(name) 

if variable_names is not None: 

variable_names.remove(name) 

if len(variable_names) == 0: 

break 

return mdict 

 

def list_variables(self): 

''' list variables from stream ''' 

self.mat_stream.seek(0) 

# Here we pass all the parameters in self to the reading objects 

self.initialize_read() 

self.read_file_header() 

vars = [] 

while not self.end_of_stream(): 

hdr, next_position = self.read_var_header() 

name = asstr(hdr.name) 

if name == '': 

# can only be a matlab 7 function workspace 

name = '__function_workspace__' 

 

shape = self._matrix_reader.shape_from_header(hdr) 

if hdr.is_logical: 

info = 'logical' 

else: 

info = mclass_info.get(hdr.mclass, 'unknown') 

vars.append((name, shape, info)) 

 

self.mat_stream.seek(next_position) 

return vars 

 

 

def varmats_from_mat(file_obj): 

""" Pull variables out of mat 5 file as a sequence of mat file objects 

 

This can be useful with a difficult mat file, containing unreadable 

variables. This routine pulls the variables out in raw form and puts them, 

unread, back into a file stream for saving or reading. Another use is the 

pathological case where there is more than one variable of the same name in 

the file; this routine returns the duplicates, whereas the standard reader 

will overwrite duplicates in the returned dictionary. 

 

The file pointer in `file_obj` will be undefined. File pointers for the 

returned file-like objects are set at 0. 

 

Parameters 

---------- 

file_obj : file-like 

file object containing mat file 

 

Returns 

------- 

named_mats : list 

list contains tuples of (name, BytesIO) where BytesIO is a file-like 

object containing mat file contents as for a single variable. The 

BytesIO contains a string with the original header and a single var. If 

``var_file_obj`` is an individual BytesIO instance, then save as a mat 

file with something like ``open('test.mat', 

'wb').write(var_file_obj.read())`` 

 

Examples 

-------- 

>>> import scipy.io 

 

BytesIO is from the ``io`` module in python 3, and is ``cStringIO`` for 

python < 3. 

 

>>> mat_fileobj = BytesIO() 

>>> scipy.io.savemat(mat_fileobj, {'b': np.arange(10), 'a': 'a string'}) 

>>> varmats = varmats_from_mat(mat_fileobj) 

>>> sorted([name for name, str_obj in varmats]) 

['a', 'b'] 

""" 

rdr = MatFile5Reader(file_obj) 

file_obj.seek(0) 

# Raw read of top-level file header 

hdr_len = MDTYPES[native_code]['dtypes']['file_header'].itemsize 

raw_hdr = file_obj.read(hdr_len) 

# Initialize variable reading 

file_obj.seek(0) 

rdr.initialize_read() 

mdict = rdr.read_file_header() 

next_position = file_obj.tell() 

named_mats = [] 

while not rdr.end_of_stream(): 

start_position = next_position 

hdr, next_position = rdr.read_var_header() 

name = asstr(hdr.name) 

# Read raw variable string 

file_obj.seek(start_position) 

byte_count = next_position - start_position 

var_str = file_obj.read(byte_count) 

# write to stringio object 

out_obj = BytesIO() 

out_obj.write(raw_hdr) 

out_obj.write(var_str) 

out_obj.seek(0) 

named_mats.append((name, out_obj)) 

return named_mats 

 

 

class EmptyStructMarker(object): 

""" Class to indicate presence of empty matlab struct on output """ 

 

 

def to_writeable(source): 

''' Convert input object ``source`` to something we can write 

 

Parameters 

---------- 

source : object 

 

Returns 

------- 

arr : None or ndarray or EmptyStructMarker 

If `source` cannot be converted to something we can write to a matfile, 

return None. If `source` is equivalent to an empty dictionary, return 

``EmptyStructMarker``. Otherwise return `source` converted to an 

ndarray with contents for writing to matfile. 

''' 

if isinstance(source, np.ndarray): 

return source 

if source is None: 

return None 

# Objects that implement mappings 

is_mapping = (hasattr(source, 'keys') and hasattr(source, 'values') and 

hasattr(source, 'items')) 

# Objects that don't implement mappings, but do have dicts 

if isinstance(source, np.generic): 

# Numpy scalars are never mappings (pypy issue workaround) 

pass 

elif not is_mapping and hasattr(source, '__dict__'): 

source = dict((key, value) for key, value in source.__dict__.items() 

if not key.startswith('_')) 

is_mapping = True 

if is_mapping: 

dtype = [] 

values = [] 

for field, value in source.items(): 

if (isinstance(field, string_types) and 

field[0] not in '_0123456789'): 

dtype.append((str(field), object)) 

values.append(value) 

if dtype: 

return np.array([tuple(values)], dtype) 

else: 

return EmptyStructMarker 

# Next try and convert to an array 

narr = np.asanyarray(source) 

if narr.dtype.type in (object, np.object_) and \ 

narr.shape == () and narr == source: 

# No interesting conversion possible 

return None 

return narr 

 

 

# Native byte ordered dtypes for convenience for writers 

NDT_FILE_HDR = MDTYPES[native_code]['dtypes']['file_header'] 

NDT_TAG_FULL = MDTYPES[native_code]['dtypes']['tag_full'] 

NDT_TAG_SMALL = MDTYPES[native_code]['dtypes']['tag_smalldata'] 

NDT_ARRAY_FLAGS = MDTYPES[native_code]['dtypes']['array_flags'] 

 

 

class VarWriter5(object): 

''' Generic matlab matrix writing class ''' 

mat_tag = np.zeros((), NDT_TAG_FULL) 

mat_tag['mdtype'] = miMATRIX 

 

def __init__(self, file_writer): 

self.file_stream = file_writer.file_stream 

self.unicode_strings = file_writer.unicode_strings 

self.long_field_names = file_writer.long_field_names 

self.oned_as = file_writer.oned_as 

# These are used for top level writes, and unset after 

self._var_name = None 

self._var_is_global = False 

 

def write_bytes(self, arr): 

self.file_stream.write(arr.tostring(order='F')) 

 

def write_string(self, s): 

self.file_stream.write(s) 

 

def write_element(self, arr, mdtype=None): 

''' write tag and data ''' 

if mdtype is None: 

mdtype = NP_TO_MTYPES[arr.dtype.str[1:]] 

# Array needs to be in native byte order 

if arr.dtype.byteorder == swapped_code: 

arr = arr.byteswap().newbyteorder() 

byte_count = arr.size*arr.itemsize 

if byte_count <= 4: 

self.write_smalldata_element(arr, mdtype, byte_count) 

else: 

self.write_regular_element(arr, mdtype, byte_count) 

 

def write_smalldata_element(self, arr, mdtype, byte_count): 

# write tag with embedded data 

tag = np.zeros((), NDT_TAG_SMALL) 

tag['byte_count_mdtype'] = (byte_count << 16) + mdtype 

# if arr.tostring is < 4, the element will be zero-padded as needed. 

tag['data'] = arr.tostring(order='F') 

self.write_bytes(tag) 

 

def write_regular_element(self, arr, mdtype, byte_count): 

# write tag, data 

tag = np.zeros((), NDT_TAG_FULL) 

tag['mdtype'] = mdtype 

tag['byte_count'] = byte_count 

self.write_bytes(tag) 

self.write_bytes(arr) 

# pad to next 64-bit boundary 

bc_mod_8 = byte_count % 8 

if bc_mod_8: 

self.file_stream.write(b'\x00' * (8-bc_mod_8)) 

 

def write_header(self, 

shape, 

mclass, 

is_complex=False, 

is_logical=False, 

nzmax=0): 

''' Write header for given data options 

shape : sequence 

array shape 

mclass - mat5 matrix class 

is_complex - True if matrix is complex 

is_logical - True if matrix is logical 

nzmax - max non zero elements for sparse arrays 

 

We get the name and the global flag from the object, and reset 

them to defaults after we've used them 

''' 

# get name and is_global from one-shot object store 

name = self._var_name 

is_global = self._var_is_global 

# initialize the top-level matrix tag, store position 

self._mat_tag_pos = self.file_stream.tell() 

self.write_bytes(self.mat_tag) 

# write array flags (complex, global, logical, class, nzmax) 

af = np.zeros((), NDT_ARRAY_FLAGS) 

af['data_type'] = miUINT32 

af['byte_count'] = 8 

flags = is_complex << 3 | is_global << 2 | is_logical << 1 

af['flags_class'] = mclass | flags << 8 

af['nzmax'] = nzmax 

self.write_bytes(af) 

# shape 

self.write_element(np.array(shape, dtype='i4')) 

# write name 

name = np.asarray(name) 

if name == '': # empty string zero-terminated 

self.write_smalldata_element(name, miINT8, 0) 

else: 

self.write_element(name, miINT8) 

# reset the one-shot store to defaults 

self._var_name = '' 

self._var_is_global = False 

 

def update_matrix_tag(self, start_pos): 

curr_pos = self.file_stream.tell() 

self.file_stream.seek(start_pos) 

byte_count = curr_pos - start_pos - 8 

if byte_count >= 2**32: 

raise MatWriteError("Matrix too large to save with Matlab " 

"5 format") 

self.mat_tag['byte_count'] = byte_count 

self.write_bytes(self.mat_tag) 

self.file_stream.seek(curr_pos) 

 

def write_top(self, arr, name, is_global): 

""" Write variable at top level of mat file 

 

Parameters 

---------- 

arr : array_like 

array-like object to create writer for 

name : str, optional 

name as it will appear in matlab workspace 

default is empty string 

is_global : {False, True}, optional 

whether variable will be global on load into matlab 

""" 

# these are set before the top-level header write, and unset at 

# the end of the same write, because they do not apply for lower levels 

self._var_is_global = is_global 

self._var_name = name 

# write the header and data 

self.write(arr) 

 

def write(self, arr): 

''' Write `arr` to stream at top and sub levels 

 

Parameters 

---------- 

arr : array_like 

array-like object to create writer for 

''' 

# store position, so we can update the matrix tag 

mat_tag_pos = self.file_stream.tell() 

# First check if these are sparse 

if scipy.sparse.issparse(arr): 

self.write_sparse(arr) 

self.update_matrix_tag(mat_tag_pos) 

return 

# Try to convert things that aren't arrays 

narr = to_writeable(arr) 

if narr is None: 

raise TypeError('Could not convert %s (type %s) to array' 

% (arr, type(arr))) 

if isinstance(narr, MatlabObject): 

self.write_object(narr) 

elif isinstance(narr, MatlabFunction): 

raise MatWriteError('Cannot write matlab functions') 

elif narr is EmptyStructMarker: # empty struct array 

self.write_empty_struct() 

elif narr.dtype.fields: # struct array 

self.write_struct(narr) 

elif narr.dtype.hasobject: # cell array 

self.write_cells(narr) 

elif narr.dtype.kind in ('U', 'S'): 

if self.unicode_strings: 

codec = 'UTF8' 

else: 

codec = 'ascii' 

self.write_char(narr, codec) 

else: 

self.write_numeric(narr) 

self.update_matrix_tag(mat_tag_pos) 

 

def write_numeric(self, arr): 

imagf = arr.dtype.kind == 'c' 

logif = arr.dtype.kind == 'b' 

try: 

mclass = NP_TO_MXTYPES[arr.dtype.str[1:]] 

except KeyError: 

# No matching matlab type, probably complex256 / float128 / float96 

# Cast data to complex128 / float64. 

if imagf: 

arr = arr.astype('c128') 

elif logif: 

arr = arr.astype('i1') # Should only contain 0/1 

else: 

arr = arr.astype('f8') 

mclass = mxDOUBLE_CLASS 

self.write_header(matdims(arr, self.oned_as), 

mclass, 

is_complex=imagf, 

is_logical=logif) 

if imagf: 

self.write_element(arr.real) 

self.write_element(arr.imag) 

else: 

self.write_element(arr) 

 

def write_char(self, arr, codec='ascii'): 

''' Write string array `arr` with given `codec` 

''' 

if arr.size == 0 or np.all(arr == ''): 

# This an empty string array or a string array containing 

# only empty strings. Matlab cannot distiguish between a 

# string array that is empty, and a string array containing 

# only empty strings, because it stores strings as arrays of 

# char. There is no way of having an array of char that is 

# not empty, but contains an empty string. We have to 

# special-case the array-with-empty-strings because even 

# empty strings have zero padding, which would otherwise 

# appear in matlab as a string with a space. 

shape = (0,) * np.max([arr.ndim, 2]) 

self.write_header(shape, mxCHAR_CLASS) 

self.write_smalldata_element(arr, miUTF8, 0) 

return 

# non-empty string. 

# 

# Convert to char array 

arr = arr_to_chars(arr) 

# We have to write the shape directly, because we are going 

# recode the characters, and the resulting stream of chars 

# may have a different length 

shape = arr.shape 

self.write_header(shape, mxCHAR_CLASS) 

if arr.dtype.kind == 'U' and arr.size: 

# Make one long string from all the characters. We need to 

# transpose here, because we're flattening the array, before 

# we write the bytes. The bytes have to be written in 

# Fortran order. 

n_chars = np.product(shape) 

st_arr = np.ndarray(shape=(), 

dtype=arr_dtype_number(arr, n_chars), 

buffer=arr.T.copy()) # Fortran order 

# Recode with codec to give byte string 

st = st_arr.item().encode(codec) 

# Reconstruct as one-dimensional byte array 

arr = np.ndarray(shape=(len(st),), 

dtype='S1', 

buffer=st) 

self.write_element(arr, mdtype=miUTF8) 

 

def write_sparse(self, arr): 

''' Sparse matrices are 2D 

''' 

A = arr.tocsc() # convert to sparse CSC format 

A.sort_indices() # MATLAB expects sorted row indices 

is_complex = (A.dtype.kind == 'c') 

is_logical = (A.dtype.kind == 'b') 

nz = A.nnz 

self.write_header(matdims(arr, self.oned_as), 

mxSPARSE_CLASS, 

is_complex=is_complex, 

is_logical=is_logical, 

# matlab won't load file with 0 nzmax 

nzmax=1 if nz == 0 else nz) 

self.write_element(A.indices.astype('i4')) 

self.write_element(A.indptr.astype('i4')) 

self.write_element(A.data.real) 

if is_complex: 

self.write_element(A.data.imag) 

 

def write_cells(self, arr): 

self.write_header(matdims(arr, self.oned_as), 

mxCELL_CLASS) 

# loop over data, column major 

A = np.atleast_2d(arr).flatten('F') 

for el in A: 

self.write(el) 

 

def write_empty_struct(self): 

self.write_header((1, 1), mxSTRUCT_CLASS) 

# max field name length set to 1 in an example matlab struct 

self.write_element(np.array(1, dtype=np.int32)) 

# Field names element is empty 

self.write_element(np.array([], dtype=np.int8)) 

 

def write_struct(self, arr): 

self.write_header(matdims(arr, self.oned_as), 

mxSTRUCT_CLASS) 

self._write_items(arr) 

 

def _write_items(self, arr): 

# write fieldnames 

fieldnames = [f[0] for f in arr.dtype.descr] 

length = max([len(fieldname) for fieldname in fieldnames])+1 

max_length = (self.long_field_names and 64) or 32 

if length > max_length: 

raise ValueError("Field names are restricted to %d characters" % 

(max_length-1)) 

self.write_element(np.array([length], dtype='i4')) 

self.write_element( 

np.array(fieldnames, dtype='S%d' % (length)), 

mdtype=miINT8) 

A = np.atleast_2d(arr).flatten('F') 

for el in A: 

for f in fieldnames: 

self.write(el[f]) 

 

def write_object(self, arr): 

'''Same as writing structs, except different mx class, and extra 

classname element after header 

''' 

self.write_header(matdims(arr, self.oned_as), 

mxOBJECT_CLASS) 

self.write_element(np.array(arr.classname, dtype='S'), 

mdtype=miINT8) 

self._write_items(arr) 

 

 

class MatFile5Writer(object): 

''' Class for writing mat5 files ''' 

 

@docfiller 

def __init__(self, file_stream, 

do_compression=False, 

unicode_strings=False, 

global_vars=None, 

long_field_names=False, 

oned_as='row'): 

''' Initialize writer for matlab 5 format files 

 

Parameters 

---------- 

%(do_compression)s 

%(unicode_strings)s 

global_vars : None or sequence of strings, optional 

Names of variables to be marked as global for matlab 

%(long_fields)s 

%(oned_as)s 

''' 

self.file_stream = file_stream 

self.do_compression = do_compression 

self.unicode_strings = unicode_strings 

if global_vars: 

self.global_vars = global_vars 

else: 

self.global_vars = [] 

self.long_field_names = long_field_names 

self.oned_as = oned_as 

self._matrix_writer = None 

 

def write_file_header(self): 

# write header 

hdr = np.zeros((), NDT_FILE_HDR) 

hdr['description'] = 'MATLAB 5.0 MAT-file Platform: %s, Created on: %s' \ 

% (os.name,time.asctime()) 

hdr['version'] = 0x0100 

hdr['endian_test'] = np.ndarray(shape=(), 

dtype='S2', 

buffer=np.uint16(0x4d49)) 

self.file_stream.write(hdr.tostring()) 

 

def put_variables(self, mdict, write_header=None): 

''' Write variables in `mdict` to stream 

 

Parameters 

---------- 

mdict : mapping 

mapping with method ``items`` returns name, contents pairs where 

``name`` which will appear in the matlab workspace in file load, and 

``contents`` is something writeable to a matlab file, such as a numpy 

array. 

write_header : {None, True, False}, optional 

If True, then write the matlab file header before writing the 

variables. If None (the default) then write the file header 

if we are at position 0 in the stream. By setting False 

here, and setting the stream position to the end of the file, 

you can append variables to a matlab file 

''' 

# write header if requested, or None and start of file 

if write_header is None: 

write_header = self.file_stream.tell() == 0 

if write_header: 

self.write_file_header() 

self._matrix_writer = VarWriter5(self) 

for name, var in mdict.items(): 

if name[0] == '_': 

continue 

is_global = name in self.global_vars 

if self.do_compression: 

stream = BytesIO() 

self._matrix_writer.file_stream = stream 

self._matrix_writer.write_top(var, asbytes(name), is_global) 

out_str = zlib.compress(stream.getvalue()) 

tag = np.empty((), NDT_TAG_FULL) 

tag['mdtype'] = miCOMPRESSED 

tag['byte_count'] = len(out_str) 

self.file_stream.write(tag.tostring()) 

self.file_stream.write(out_str) 

else: # not compressing 

self._matrix_writer.write_top(var, asbytes(name), is_global)