Coverage for /usr/lib/python3/dist-packages/scipy/io/idl.py: 11%

100

101

102

103

# IDLSave - a python module to read IDL 'save' files

# Many thanks to Craig Markwardt for publishing the Unofficial Format

# Specification for IDL .sav files, without which this Python module would not

# exist (http://cow.physics.wisc.edu/~craigm/idl/savefmt).

# This code was developed by with permission from ITT Visual Information

# Systems. IDL(r) is a registered trademark of ITT Visual Information Systems,

# Inc. for their Interactive Data Language software.

# Permission is hereby granted, free of charge, to any person obtaining a

# copy of this software and associated documentation files (the "Software"),

# to deal in the Software without restriction, including without limitation

# the rights to use, copy, modify, merge, publish, distribute, sublicense,

# and/or sell copies of the Software, and to permit persons to whom the

# Software is furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in

# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

# DEALINGS IN THE SOFTWARE.

from __future__ import division, print_function, absolute_import

import struct

import numpy as np

from numpy.compat import asstr

import tempfile

import zlib

import warnings

# Define the different data types that can be found in an IDL save file

DTYPE_DICT = {1: '>u1',

2: '>i2',

3: '>i4',

4: '>f4',

5: '>f8',

6: '>c8',

7: '|O',

8: '|O',

9: '>c16',

10: '|O',

11: '|O',

12: '>u2',

13: '>u4',

14: '>i8',

15: '>u8'}

# Define the different record types that can be found in an IDL save file

RECTYPE_DICT = {0: "START_MARKER",

1: "COMMON_VARIABLE",

2: "VARIABLE",

3: "SYSTEM_VARIABLE",

6: "END_MARKER",

10: "TIMESTAMP",

12: "COMPILED",

13: "IDENTIFICATION",

14: "VERSION",

15: "HEAP_HEADER",

16: "HEAP_DATA",

17: "PROMOTE64",

19: "NOTICE",

20: "DESCRIPTION"}

# Define a dictionary to contain structure definitions

STRUCT_DICT = {}

def _align_32(f):

'''Align to the next 32-bit position in a file'''

pos = f.tell()

if pos % 4 != 0:

f.seek(pos + 4 - pos % 4)

return

def _skip_bytes(f, n):

'''Skip `n` bytes'''

f.read(n)

return

def _read_bytes(f, n):

'''Read the next `n` bytes'''

return f.read(n)

def _read_byte(f):

'''Read a single byte'''

return np.uint8(struct.unpack('>B', f.read(4)[:1])[0])

def _read_long(f):

'''Read a signed 32-bit integer'''

return np.int32(struct.unpack('>l', f.read(4))[0])

def _read_int16(f):

'''Read a signed 16-bit integer'''

return np.int16(struct.unpack('>h', f.read(4)[2:4])[0])

def _read_int32(f):

'''Read a signed 32-bit integer'''

return np.int32(struct.unpack('>i', f.read(4))[0])

def _read_int64(f):

'''Read a signed 64-bit integer'''

return np.int64(struct.unpack('>q', f.read(8))[0])

def _read_uint16(f):

'''Read an unsigned 16-bit integer'''

return np.uint16(struct.unpack('>H', f.read(4)[2:4])[0])

def _read_uint32(f):

'''Read an unsigned 32-bit integer'''

return np.uint32(struct.unpack('>I', f.read(4))[0])

def _read_uint64(f):

'''Read an unsigned 64-bit integer'''

return np.uint64(struct.unpack('>Q', f.read(8))[0])

def _read_float32(f):

'''Read a 32-bit float'''

return np.float32(struct.unpack('>f', f.read(4))[0])

def _read_float64(f):

'''Read a 64-bit float'''

return np.float64(struct.unpack('>d', f.read(8))[0])

class Pointer(object):

'''Class used to define pointers'''

def __init__(self, index):

self.index = index

return

class ObjectPointer(Pointer):

'''Class used to define object pointers'''

pass

def _read_string(f):

'''Read a string'''

length = _read_long(f)

if length > 0:

chars = _read_bytes(f, length)

_align_32(f)

chars = asstr(chars)

else:

chars = ''

return chars

def _read_string_data(f):

'''Read a data string (length is specified twice)'''

length = _read_long(f)

if length > 0:

length = _read_long(f)

string_data = _read_bytes(f, length)

_align_32(f)

else:

string_data = ''

return string_data

def _read_data(f, dtype):

'''Read a variable with a specified data type'''

if dtype == 1:

if _read_int32(f) != 1:

raise Exception("Error occurred while reading byte variable")

return _read_byte(f)

elif dtype == 2:

return _read_int16(f)

elif dtype == 3:

return _read_int32(f)

elif dtype == 4:

return _read_float32(f)

elif dtype == 5:

return _read_float64(f)

elif dtype == 6:

real = _read_float32(f)

imag = _read_float32(f)

return np.complex64(real + imag * 1j)

elif dtype == 7:

return _read_string_data(f)

elif dtype == 8:

raise Exception("Should not be here - please report this")

elif dtype == 9:

real = _read_float64(f)

imag = _read_float64(f)

return np.complex128(real + imag * 1j)

elif dtype == 10:

return Pointer(_read_int32(f))

elif dtype == 11:

return ObjectPointer(_read_int32(f))

elif dtype == 12:

return _read_uint16(f)

elif dtype == 13:

return _read_uint32(f)

elif dtype == 14:

return _read_int64(f)

elif dtype == 15:

return _read_uint64(f)

else:

raise Exception("Unknown IDL type: %i - please report this" % dtype)

def _read_structure(f, array_desc, struct_desc):

'''

Read a structure, with the array and structure descriptors given as

`array_desc` and `structure_desc` respectively.

'''

nrows = array_desc['nelements']

columns = struct_desc['tagtable']

dtype = []

for col in columns:

if col['structure'] or col['array']:

dtype.append(((col['name'].lower(), col['name']), np.object_))

else:

if col['typecode'] in DTYPE_DICT:

dtype.append(((col['name'].lower(), col['name']),

DTYPE_DICT[col['typecode']]))

else:

raise Exception("Variable type %i not implemented" %

col['typecode'])

structure = np.recarray((nrows, ), dtype=dtype)

for i in range(nrows):

for col in columns:

dtype = col['typecode']

if col['structure']:

structure[col['name']][i] = _read_structure(f,

struct_desc['arrtable'][col['name']],

struct_desc['structtable'][col['name']])

elif col['array']:

structure[col['name']][i] = _read_array(f, dtype,

struct_desc['arrtable'][col['name']])

else:

structure[col['name']][i] = _read_data(f, dtype)

# Reshape structure if needed

if array_desc['ndims'] > 1:

dims = array_desc['dims'][:int(array_desc['ndims'])]

dims.reverse()

structure = structure.reshape(dims)

return structure

def _read_array(f, typecode, array_desc):

'''

Read an array of type `typecode`, with the array descriptor given as

`array_desc`.

'''

if typecode in [1, 3, 4, 5, 6, 9, 13, 14, 15]:

if typecode == 1:

nbytes = _read_int32(f)

if nbytes != array_desc['nbytes']:

warnings.warn("Not able to verify number of bytes from header")

# Read bytes as numpy array

array = np.frombuffer(f.read(array_desc['nbytes']),

dtype=DTYPE_DICT[typecode])

elif typecode in [2, 12]:

# These are 2 byte types, need to skip every two as they are not packed

array = np.frombuffer(f.read(array_desc['nbytes']*2),

dtype=DTYPE_DICT[typecode])[1::2]

else:

# Read bytes into list

array = []

for i in range(array_desc['nelements']):

dtype = typecode

data = _read_data(f, dtype)

array.append(data)

array = np.array(array, dtype=np.object_)

# Reshape array if needed

if array_desc['ndims'] > 1:

dims = array_desc['dims'][:int(array_desc['ndims'])]

dims.reverse()

array = array.reshape(dims)

# Go to next alignment position

_align_32(f)

return array

def _read_record(f):

'''Function to read in a full record'''

record = {'rectype': _read_long(f)}

nextrec = _read_uint32(f)

nextrec += _read_uint32(f) * 2**32

_skip_bytes(f, 4)

if not record['rectype'] in RECTYPE_DICT:

raise Exception("Unknown RECTYPE: %i" % record['rectype'])

record['rectype'] = RECTYPE_DICT[record['rectype']]

if record['rectype'] in ["VARIABLE", "HEAP_DATA"]:

if record['rectype'] == "VARIABLE":

record['varname'] = _read_string(f)

else:

record['heap_index'] = _read_long(f)

_skip_bytes(f, 4)

rectypedesc = _read_typedesc(f)

if rectypedesc['typecode'] == 0:

if nextrec == f.tell():

record['data'] = None # Indicates NULL value

else:

raise ValueError("Unexpected type code: 0")

else:

varstart = _read_long(f)

if varstart != 7:

raise Exception("VARSTART is not 7")

if rectypedesc['structure']:

record['data'] = _read_structure(f, rectypedesc['array_desc'],

rectypedesc['struct_desc'])

elif rectypedesc['array']:

record['data'] = _read_array(f, rectypedesc['typecode'],

rectypedesc['array_desc'])

else:

dtype = rectypedesc['typecode']

record['data'] = _read_data(f, dtype)

elif record['rectype'] == "TIMESTAMP":

_skip_bytes(f, 4*256)

record['date'] = _read_string(f)

record['user'] = _read_string(f)

record['host'] = _read_string(f)

elif record['rectype'] == "VERSION":

record['format'] = _read_long(f)

record['arch'] = _read_string(f)

record['os'] = _read_string(f)

record['release'] = _read_string(f)

elif record['rectype'] == "IDENTIFICATON":

record['author'] = _read_string(f)

record['title'] = _read_string(f)

record['idcode'] = _read_string(f)

elif record['rectype'] == "NOTICE":

record['notice'] = _read_string(f)

elif record['rectype'] == "DESCRIPTION":

record['description'] = _read_string_data(f)

elif record['rectype'] == "HEAP_HEADER":

record['nvalues'] = _read_long(f)

record['indices'] = []

for i in range(record['nvalues']):

record['indices'].append(_read_long(f))

elif record['rectype'] == "COMMONBLOCK":

record['nvars'] = _read_long(f)

record['name'] = _read_string(f)

record['varnames'] = []

for i in range(record['nvars']):

record['varnames'].append(_read_string(f))

elif record['rectype'] == "END_MARKER":

record['end'] = True

elif record['rectype'] == "UNKNOWN":

warnings.warn("Skipping UNKNOWN record")

elif record['rectype'] == "SYSTEM_VARIABLE":

warnings.warn("Skipping SYSTEM_VARIABLE record")

else:

raise Exception("record['rectype']=%s not implemented" %

record['rectype'])

f.seek(nextrec)

return record

def _read_typedesc(f):

'''Function to read in a type descriptor'''

typedesc = {'typecode': _read_long(f), 'varflags': _read_long(f)}

if typedesc['varflags'] & 2 == 2:

raise Exception("System variables not implemented")

typedesc['array'] = typedesc['varflags'] & 4 == 4

typedesc['structure'] = typedesc['varflags'] & 32 == 32

if typedesc['structure']:

typedesc['array_desc'] = _read_arraydesc(f)

typedesc['struct_desc'] = _read_structdesc(f)

elif typedesc['array']:

typedesc['array_desc'] = _read_arraydesc(f)

return typedesc

def _read_arraydesc(f):

'''Function to read in an array descriptor'''

arraydesc = {'arrstart': _read_long(f)}

if arraydesc['arrstart'] == 8:

_skip_bytes(f, 4)

arraydesc['nbytes'] = _read_long(f)

arraydesc['nelements'] = _read_long(f)

arraydesc['ndims'] = _read_long(f)

_skip_bytes(f, 8)

arraydesc['nmax'] = _read_long(f)

arraydesc['dims'] = []

for d in range(arraydesc['nmax']):

arraydesc['dims'].append(_read_long(f))

elif arraydesc['arrstart'] == 18:

warnings.warn("Using experimental 64-bit array read")

_skip_bytes(f, 8)

arraydesc['nbytes'] = _read_uint64(f)

arraydesc['nelements'] = _read_uint64(f)

arraydesc['ndims'] = _read_long(f)

_skip_bytes(f, 8)

arraydesc['nmax'] = 8

arraydesc['dims'] = []

for d in range(arraydesc['nmax']):

v = _read_long(f)

if v != 0:

raise Exception("Expected a zero in ARRAY_DESC")

arraydesc['dims'].append(_read_long(f))

else:

raise Exception("Unknown ARRSTART: %i" % arraydesc['arrstart'])

return arraydesc

def _read_structdesc(f):

'''Function to read in a structure descriptor'''

structdesc = {}

structstart = _read_long(f)

if structstart != 9:

raise Exception("STRUCTSTART should be 9")

structdesc['name'] = _read_string(f)

predef = _read_long(f)

structdesc['ntags'] = _read_long(f)

structdesc['nbytes'] = _read_long(f)

structdesc['predef'] = predef & 1

structdesc['inherits'] = predef & 2

structdesc['is_super'] = predef & 4

if not structdesc['predef']:

structdesc['tagtable'] = []

for t in range(structdesc['ntags']):

structdesc['tagtable'].append(_read_tagdesc(f))

for tag in structdesc['tagtable']:

tag['name'] = _read_string(f)

structdesc['arrtable'] = {}

for tag in structdesc['tagtable']:

if tag['array']:

structdesc['arrtable'][tag['name']] = _read_arraydesc(f)

structdesc['structtable'] = {}

for tag in structdesc['tagtable']:

if tag['structure']:

structdesc['structtable'][tag['name']] = _read_structdesc(f)

if structdesc['inherits'] or structdesc['is_super']:

structdesc['classname'] = _read_string(f)

structdesc['nsupclasses'] = _read_long(f)

structdesc['supclassnames'] = []

for s in range(structdesc['nsupclasses']):

structdesc['supclassnames'].append(_read_string(f))

structdesc['supclasstable'] = []

for s in range(structdesc['nsupclasses']):

structdesc['supclasstable'].append(_read_structdesc(f))

STRUCT_DICT[structdesc['name']] = structdesc

else:

if not structdesc['name'] in STRUCT_DICT:

raise Exception("PREDEF=1 but can't find definition")

structdesc = STRUCT_DICT[structdesc['name']]

return structdesc

def _read_tagdesc(f):

'''Function to read in a tag descriptor'''

tagdesc = {'offset': _read_long(f)}

if tagdesc['offset'] == -1:

tagdesc['offset'] = _read_uint64(f)

tagdesc['typecode'] = _read_long(f)

tagflags = _read_long(f)

tagdesc['array'] = tagflags & 4 == 4

tagdesc['structure'] = tagflags & 32 == 32

tagdesc['scalar'] = tagdesc['typecode'] in DTYPE_DICT

# Assume '10'x is scalar

return tagdesc

def _replace_heap(variable, heap):

if isinstance(variable, Pointer):

while isinstance(variable, Pointer):

if variable.index == 0:

variable = None

else:

if variable.index in heap:

variable = heap[variable.index]

else:

warnings.warn("Variable referenced by pointer not found "

"in heap: variable will be set to None")

variable = None

replace, new = _replace_heap(variable, heap)

if replace:

variable = new

return True, variable

elif isinstance(variable, np.core.records.recarray):

# Loop over records

for ir, record in enumerate(variable):

replace, new = _replace_heap(record, heap)

if replace:

variable[ir] = new

return False, variable

elif isinstance(variable, np.core.records.record):

# Loop over values

for iv, value in enumerate(variable):

replace, new = _replace_heap(value, heap)

if replace:

variable[iv] = new

return False, variable

elif isinstance(variable, np.ndarray):

# Loop over values if type is np.object_

if variable.dtype.type is np.object_:

for iv in range(variable.size):

replace, new = _replace_heap(variable.item(iv), heap)

if replace:

variable.itemset(iv, new)

return False, variable

else:

return False, variable

class AttrDict(dict):

'''

A case-insensitive dictionary with access via item, attribute, and call

notations:

>>> d = AttrDict()

>>> d['Variable'] = 123

>>> d['Variable']

123

>>> d.Variable

123

>>> d.variable

123

>>> d('VARIABLE')

123

'''

def __init__(self, init={}):

dict.__init__(self, init)

def __getitem__(self, name):

return super(AttrDict, self).__getitem__(name.lower())

def __setitem__(self, key, value):

return super(AttrDict, self).__setitem__(key.lower(), value)

__getattr__ = __getitem__

__setattr__ = __setitem__

__call__ = __getitem__

def readsav(file_name, idict=None, python_dict=False,

uncompressed_file_name=None, verbose=False):

"""

Read an IDL .sav file.

Parameters

----------

file_name : str

Name of the IDL save file.

idict : dict, optional

Dictionary in which to insert .sav file variables.

python_dict : bool, optional

By default, the object return is not a Python dictionary, but a

case-insensitive dictionary with item, attribute, and call access

to variables. To get a standard Python dictionary, set this option

to True.

uncompressed_file_name : str, optional

This option only has an effect for .sav files written with the

/compress option. If a file name is specified, compressed .sav

files are uncompressed to this file. Otherwise, readsav will use

the `tempfile` module to determine a temporary filename

automatically, and will remove the temporary file upon successfully

reading it in.

verbose : bool, optional

Whether to print out information about the save file, including

the records read, and available variables.

Returns

-------

idl_dict : AttrDict or dict

If `python_dict` is set to False (default), this function returns a

case-insensitive dictionary with item, attribute, and call access

to variables. If `python_dict` is set to True, this function

returns a Python dictionary with all variable names in lowercase.

If `idict` was specified, then variables are written to the

dictionary specified, and the updated dictionary is returned.

"""

# Initialize record and variable holders

records = []

if python_dict or idict:

variables = {}

else:

variables = AttrDict()

# Open the IDL file

f = open(file_name, 'rb')

# Read the signature, which should be 'SR'

signature = _read_bytes(f, 2)

if signature != b'SR':

raise Exception("Invalid SIGNATURE: %s" % signature)

# Next, the record format, which is '\x00\x04' for normal .sav

# files, and '\x00\x06' for compressed .sav files.

recfmt = _read_bytes(f, 2)

if recfmt == b'\x00\x04':

pass

elif recfmt == b'\x00\x06':

if verbose:

print("IDL Save file is compressed")

if uncompressed_file_name:

fout = open(uncompressed_file_name, 'w+b')

else:

fout = tempfile.NamedTemporaryFile(suffix='.sav')

if verbose:

print(" -> expanding to %s" % fout.name)

# Write header

fout.write(b'SR\x00\x04')

# Cycle through records

while True:

# Read record type

rectype = _read_long(f)

fout.write(struct.pack('>l', int(rectype)))

# Read position of next record and return as int

nextrec = _read_uint32(f)

nextrec += _read_uint32(f) * 2**32

# Read the unknown 4 bytes

unknown = f.read(4)

# Check if the end of the file has been reached

if RECTYPE_DICT[rectype] == 'END_MARKER':

fout.write(struct.pack('>I', int(nextrec) % 2**32))

fout.write(struct.pack('>I', int((nextrec - (nextrec % 2**32)) / 2**32)))

fout.write(unknown)

break

# Find current position

pos = f.tell()

# Decompress record

rec_string = zlib.decompress(f.read(nextrec-pos))

# Find new position of next record

nextrec = fout.tell() + len(rec_string) + 12

# Write out record

fout.write(struct.pack('>I', int(nextrec % 2**32)))

fout.write(struct.pack('>I', int((nextrec - (nextrec % 2**32)) / 2**32)))

fout.write(unknown)

fout.write(rec_string)

# Close the original compressed file

f.close()

# Set f to be the decompressed file, and skip the first four bytes

f = fout

f.seek(4)

else:

raise Exception("Invalid RECFMT: %s" % recfmt)

# Loop through records, and add them to the list

while True:

r = _read_record(f)

records.append(r)

if 'end' in r:

if r['end']:

break

# Close the file

f.close()

# Find heap data variables

heap = {}

for r in records:

if r['rectype'] == "HEAP_DATA":

heap[r['heap_index']] = r['data']

# Find all variables

for r in records:

if r['rectype'] == "VARIABLE":

replace, new = _replace_heap(r['data'], heap)

if replace:

r['data'] = new

variables[r['varname'].lower()] = r['data']

if verbose:

# Print out timestamp info about the file

for record in records:

if record['rectype'] == "TIMESTAMP":

print("-"*50)

print("Date: %s" % record['date'])

print("User: %s" % record['user'])

print("Host: %s" % record['host'])

break

# Print out version info about the file

for record in records:

if record['rectype'] == "VERSION":

print("-"*50)

print("Format: %s" % record['format'])

print("Architecture: %s" % record['arch'])

print("Operating System: %s" % record['os'])

print("IDL Version: %s" % record['release'])

break

# Print out identification info about the file

for record in records:

if record['rectype'] == "IDENTIFICATON":

print("-"*50)

print("Author: %s" % record['author'])

print("Title: %s" % record['title'])

print("ID Code: %s" % record['idcode'])

break

# Print out descriptions saved with the file

for record in records:

if record['rectype'] == "DESCRIPTION":

print("-"*50)

print("Description: %s" % record['description'])

break

print("-"*50)

print("Successfully read %i records of which:" %

(len(records)))

# Create convenience list of record types

rectypes = [r['rectype'] for r in records]

for rt in set(rectypes):

if rt != 'END_MARKER':

print(" - %i are of type %s" % (rectypes.count(rt), rt))

print("-"*50)

if 'VARIABLE' in rectypes:

print("Available variables:")

for var in variables:

print(" - %s [%s]" % (var, type(variables[var])))

print("-"*50)

if idict:

for var in variables:

idict[var] = variables[var]

return idict

else:

return variables

Coverage for /usr/lib/python3/dist-packages/scipy/io/idl.py : 11%

433 statements 46 run 387 missing 15 excluded