Source code for pyrocko.io.tdms_idas

import logging
import os.path as op
import struct
import datetime
import mmap
import numpy as num

from pyrocko import trace

logger = logging.getLogger(__name__)


def write_property_dict(prop_dict, out_file):
    from pprint import pformat

    f = open(out_file, 'w')
    f.write('tdms_property_map=')
    f.write(pformat(prop_dict))
    f.close()


[docs]def type_not_supported(vargin):
    '''Function raises a NotImplementedException.'''
    raise NotImplementedError('Reading of this tdsDataType is not implemented')


[docs]def parse_time_stamp(fractions, seconds):
    '''
    Convert time TDMS time representation to datetime
    fractions   -- fractional seconds (2^-64)
    seconds     -- The number of seconds since 1/1/1904
    @rtype : datetime.datetime
    '''
    if (
        fractions is not None
        and seconds is not None
        and fractions + seconds > 0
    ):
        return datetime.timedelta(
            0, fractions * 2 ** -64 + seconds
        ) + datetime.datetime(1904, 1, 1)
    else:
        return None


# Enum mapping TDM data types to description string, numpy type where exists
# See Ref[2] for enum values
TDS_DATA_TYPE = dict(
    {
        0x00: 'void',  # tdsTypeVoid
        0x01: 'int8',  # tdsTypeI8
        0x02: 'int16',  # tdsTypeI16
        0x03: 'int32',  # tdsTypeI32
        0x04: 'int64',  # tdsTypeI64
        0x05: 'uint8',  # tdsTypeU8
        0x06: 'uint16',  # tdsTypeU16
        0x07: 'uint32',  # tdsTypeU32
        0x08: 'uint64',  # tdsTypeU64
        0x09: 'float32',  # tdsTypeSingleFloat
        0x0A: 'float64',  # tdsTypeDoubleFloat
        0x0B: 'float128',  # tdsTypeExtendedFloat
        0x19: 'singleFloatWithUnit',  # tdsTypeSingleFloatWithUnit
        0x1A: 'doubleFloatWithUnit',  # tdsTypeDoubleFloatWithUnit
        0x1B: 'extendedFloatWithUnit',  # tdsTypeExtendedFloatWithUnit
        0x20: 'str',  # tdsTypeString
        0x21: 'bool',  # tdsTypeBoolean
        0x44: 'datetime',  # tdsTypeTimeStamp
        0xFFFFFFFF: 'raw',  # tdsTypeDAQmxRawData
    }
)

# Function mapping for reading TDMS data types
TDS_READ_VAL = dict(
    {
        'void': lambda f: None,  # tdsTypeVoid
        'int8': lambda f: struct.unpack('<b', f.read(1))[0],
        'int16': lambda f: struct.unpack('<h', f.read(2))[0],
        'int32': lambda f: struct.unpack('<i', f.read(4))[0],
        'int64': lambda f: struct.unpack('<q', f.read(8))[0],
        'uint8': lambda f: struct.unpack('<B', f.read(1))[0],
        'uint16': lambda f: struct.unpack('<H', f.read(2))[0],
        'uint32': lambda f: struct.unpack('<I', f.read(4))[0],
        'uint64': lambda f: struct.unpack('<Q', f.read(8))[0],
        'float32': lambda f: struct.unpack('<f', f.read(4))[0],
        'float64': lambda f: struct.unpack('<d', f.read(8))[0],
        'float128': type_not_supported,
        'singleFloatWithUnit': type_not_supported,
        'doubleFloatWithUnit': type_not_supported,
        'extendedFloatWithUnit': type_not_supported,
        'str': lambda f: f.read(struct.unpack('<i', f.read(4))[0]),
        'bool': lambda f: struct.unpack('<?', f.read(1))[0],
        'datetime': lambda f: parse_time_stamp(
            struct.unpack('<Q', f.read(8))[0],
            struct.unpack('<q', f.read(8))[0],
        ),
        'raw': type_not_supported,
    }
)

DECIMATE_MASK = 0b00100000
LEAD_IN_LENGTH = 28
FILEINFO_NAMES = (
    'file_tag',
    'toc',
    'version',
    'next_segment_offset',
    'raw_data_offset',
)


[docs]class TdmsReader(object):
    '''A TDMS file reader object for reading properties and data'''

    def __init__(self, filename):
        self._properties = None
        self._end_of_properties_offset = None
        self._data_type = None
        self._chunk_size = None

        self._raw_data = None
        self._raw_data2 = None  # The mapped data in the 'Next Segment'
        self._raw_last_chunk = None
        self._raw2_last_chunk = None

        self.file_size = op.getsize(filename)
        self._channel_length = None
        self._seg1_length = None
        self._seg2_length = None

        # TODO: Error if file not big enough to hold header
        self._tdms_file = open(filename, 'rb')
        # Read lead in (28 bytes):
        lead_in = self._tdms_file.read(LEAD_IN_LENGTH)
        # lead_in is 28 bytes:
        # [string of length 4][int32][int32][int64][int64]
        fields = struct.unpack('<4siiQQ', lead_in)

        if fields[0].decode() not in 'TDSm':
            msg = 'Not a TDMS file (TDSm tag not found)'
            raise (TypeError, msg)

        self.fileinfo = dict(zip(FILEINFO_NAMES, fields))
        self.fileinfo['decimated'] = not bool(
            self.fileinfo['toc'] & DECIMATE_MASK
        )
        # Make offsets relative to beginning of file:
        self.fileinfo['next_segment_offset'] += LEAD_IN_LENGTH
        self.fileinfo['raw_data_offset'] += LEAD_IN_LENGTH
        self.fileinfo['file_size'] = op.getsize(self._tdms_file.name)

        # TODO: Validate lead in:
        if self.fileinfo['next_segment_offset'] > self.file_size:
            self.fileinfo['next_segment_offset'] = self.file_size
            # raise(ValueError, "Next Segment Offset too large in TDMS header")

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self._tdms_file.close()

    @property
    def channel_length(self):
        if self._properties is None:
            self.get_properties()

        rdo = num.int64(self.fileinfo['raw_data_offset'])
        nch = num.int64(self.n_channels)
        nso = self.fileinfo['next_segment_offset']
        return num.int64(
            (nso - rdo) / nch / num.dtype(self._data_type).itemsize)

    @property
    def n_channels(self):
        if self._properties is None:
            self.get_properties()
        return self.fileinfo['n_channels']

[docs]    def get_properties(self, mapped=False):
        '''
        Return a dictionary of properties. Read from file only if necessary.
        '''
        # Check if already hold properties in memory
        if self._properties is None:
            self._properties = self._read_properties()
        return self._properties

    def _read_property(self):
        '''
        Read a single property from the TDMS file.
        Return the name, type and value of the property as a list.
        '''
        # Read length of object path:
        var = struct.unpack('<i', self._tdms_file.read(4))[0]
        # Read property name and type:
        name, data_type = struct.unpack(
            '<{0}si'.format(var), self._tdms_file.read(var + 4)
        )
        # Lookup function to read and parse property value based on type:
        value = TDS_READ_VAL[TDS_DATA_TYPE[data_type]](self._tdms_file)
        name = name.decode()
        if data_type == 32:
            value = value.decode()

        return name, data_type, value

    def _read_properties(self):
        '''Read the properties from the file'''
        self._tdms_file.seek(LEAD_IN_LENGTH, 0)
        # Number of channels is total objects - file objects - group objects
        self.fileinfo['n_channels'] = (
            struct.unpack('i', self._tdms_file.read(4))[0] - 2
        )
        # Read length of object path:
        var = struct.unpack('<i', self._tdms_file.read(4))[0]
        # skip over object path and raw data index:
        self._tdms_file.seek(var + 4, 1)
        # Read number of properties in this group:
        var = struct.unpack('<i', self._tdms_file.read(4))[0]

        # loop through and read each property
        properties = [self._read_property() for _ in range(var)]
        properties = {prop: value for (prop, type, value) in properties}

        self._end_of_properties_offset = self._tdms_file.tell()

        self._read_chunk_size()
        # TODO: Add number of channels to properties
        return properties

    def _read_chunk_size(self):
        '''Read the data chunk size from the TDMS file header.'''
        if self._end_of_properties_offset is None:
            self._read_properties()

        self._tdms_file.seek(self._end_of_properties_offset, 0)

        # skip over Group Information:
        var = struct.unpack('<i', self._tdms_file.read(4))[0]
        self._tdms_file.seek(var + 8, 1)

        # skip over first channel path and length of index information:
        var = struct.unpack('<i', self._tdms_file.read(4))[0]
        self._tdms_file.seek(var + 4, 1)

        self._data_type = TDS_DATA_TYPE.get(
            struct.unpack('<i', self._tdms_file.read(4))[0]
        )
        if self._data_type not in ('int16', 'float32'):
            raise Exception('Unsupported TDMS data type: ' + self._data_type)

        # Read Dimension of the raw data array (has to be 1):
        # dummy = struct.unpack("<i", self._tdms_file.read(4))[0]

        self._chunk_size = struct.unpack('<i', self._tdms_file.read(4))[0]

[docs]    def get_data(self, first_ch=0, last_ch=None, first_s=0, last_s=None):
        '''
        Get a block of data from the TDMS file.
        first_ch -- The first channel to load
        last_ch  -- The last channel to load
        first_s  -- The first sample to load
        last_s   -- The last sample to load
        '''
        if self._raw_data is None:
            self._initialise_data()
        if first_ch is None or first_ch < 0:
            first_ch = 0
        if last_ch is None or last_ch >= self.n_channels:
            last_ch = self.n_channels
        else:
            last_ch += 1
        if last_s is None or last_s > self._channel_length:
            last_s = self._channel_length
        else:
            last_s += 1
        nch = num.int64(max(last_ch - first_ch, 0))
        ns = num.int64(max(last_s - first_s, 0))

        # Allocate output container
        data = num.empty((ns, nch), dtype=num.dtype(self._data_type))
        if data.size == 0:
            return data

        # 1. Index first block & reshape?
        first_blk = first_s // self._chunk_size
        last_blk = last_s // self._chunk_size
        last_full_blk = min(last_blk + 1, self._raw_data.shape[1])
        nchunk = min(
            max(last_full_blk - first_blk, 0), self._raw_data.shape[1]
        )
        first_s_1a = max(first_s - first_blk * self._chunk_size, 0)
        last_s_1a = min(
            last_s - first_blk * self._chunk_size, nchunk * self._chunk_size
        )
        ind_s = 0
        ind_e = ind_s + max(last_s_1a - first_s_1a, 0)

        d = self._raw_data[:, first_blk:last_full_blk, first_ch:last_ch]
        d.shape = (self._chunk_size * nchunk, nch)
        d.reshape((self._chunk_size * nchunk, nch), order='F')
        data[ind_s:ind_e, :] = d[first_s_1a:last_s_1a, :]

        # 2. Index first additional samples
        first_s_1b = max(
            first_s - self._raw_data.shape[1] * self._chunk_size, 0
        )
        last_s_1b = min(
            last_s - self._raw_data.shape[1] * self._chunk_size,
            self._raw_last_chunk.shape[0],
        )
        ind_s = ind_e
        ind_e = ind_s + max(last_s_1b - first_s_1b, 0)
        # data_1b = self._raw_last_chunk[first_s_1b:last_s_1b,first_ch:last_ch]
        if ind_e > ind_s:
            data[ind_s:ind_e, :] = self._raw_last_chunk[
                first_s_1b:last_s_1b, first_ch:last_ch
            ]

        # 3. Index second block
        first_s_2 = max(first_s - self._seg1_length, 0)
        last_s_2 = last_s - self._seg1_length
        if (first_s_2 > 0 or last_s_2 > 0) and self._raw_data2 is not None:
            first_blk_2 = max(first_s_2 // self._chunk_size, 0)
            last_blk_2 = max(last_s_2 // self._chunk_size, 0)
            last_full_blk_2 = min(last_blk_2 + 1, self._raw_data2.shape[1])
            nchunk_2 = min(
                max(last_full_blk_2 - first_blk_2, 0), self._raw_data2.shape[1]
            )
            first_s_2a = max(first_s_2 - first_blk_2 * self._chunk_size, 0)
            last_s_2a = min(
                last_s_2 - first_blk_2 * self._chunk_size,
                nchunk_2 * self._chunk_size,
            )
            ind_s = ind_e
            ind_e = ind_s + max(last_s_2a - first_s_2a, 0)
            # data_2a = self._raw_data2[:, first_blk_2:last_full_blk_2,
            #             first_ch:last_ch]\
            #   .reshape((self._chunk_size*nchunk_2, nch), order='F')\
            #   [first_s_2a:last_s_2a, :]
            if ind_e > ind_s:
                data[ind_s:ind_e, :] = self._raw_data2[
                    :, first_blk_2:last_full_blk_2, first_ch:last_ch
                ].reshape((self._chunk_size * nchunk_2, nch), order='F')[
                    first_s_2a:last_s_2a, :
                ]
        # 4. Index second additional samples
        if (
            first_s_2 > 0 or last_s_2 > 0
        ) and self._raw2_last_chunk is not None:
            first_s_2b = max(
                first_s_2 - self._raw_data2.shape[1] * self._chunk_size, 0
            )
            last_s_2b = min(
                last_s_2 - self._raw_data2.shape[1] * self._chunk_size,
                self._raw2_last_chunk.shape[0],
            )
            ind_s = ind_e
            ind_e = ind_s + max(last_s_2b - first_s_2b, 0)
            # data_2b = \
            #   self._raw2_last_chunk[first_s_2b:last_s_2b,first_ch:last_ch]
            if ind_e > ind_s:
                data[ind_s:ind_e, :] = self._raw2_last_chunk[
                    first_s_2b:last_s_2b, first_ch:last_ch
                ]
        # 5. Concatenate blocks
        # data = num.concatenate((data_1a, data_1b, data_2a, data_2b))
        if data.size == 0:
            data = data.reshape(0, 0)
        return data

    def _initialise_data(self):
        '''Initialise the memory map for the data array.'''
        if self._chunk_size is None:
            self._read_chunk_size()

        dmap = mmap.mmap(self._tdms_file.fileno(), 0, access=mmap.ACCESS_READ)
        rdo = num.int64(self.fileinfo['raw_data_offset'])
        nch = num.int64(self.n_channels)

        # TODO: Support streaming file type?
        # TODO: Is this a valid calculation for ChannelLength?
        nso = self.fileinfo['next_segment_offset']
        self._seg1_length = num.int64(
            (nso - rdo) / nch / num.dtype(self._data_type).itemsize
        )
        self._channel_length = self._seg1_length

        if self.fileinfo['decimated']:
            n_complete_blk = num.int64(self._seg1_length / self._chunk_size)
            ax_ord = 'C'
        else:
            n_complete_blk = 0
            ax_ord = 'F'
        self._raw_data = num.ndarray(
            (n_complete_blk, nch, self._chunk_size),
            dtype=self._data_type,
            buffer=dmap,
            offset=rdo,
        )
        # Rotate the axes to [chunk_size, nblk, nch]
        self._raw_data = num.rollaxis(self._raw_data, 2)
        additional_samples = num.int64(
            self._seg1_length - n_complete_blk * self._chunk_size
        )
        additional_samples_offset = (
            rdo
            + n_complete_blk
            * nch
            * self._chunk_size
            * num.dtype(self._data_type).itemsize
        )
        self._raw_last_chunk = num.ndarray(
            (nch, additional_samples),
            dtype=self._data_type,
            buffer=dmap,
            offset=additional_samples_offset,
            order=ax_ord,
        )
        # Rotate the axes to [samples, nch]
        self._raw_last_chunk = num.rollaxis(self._raw_last_chunk, 1)

        if self.file_size == nso:
            self._seg2_length = 0
        else:
            self._tdms_file.seek(nso + 12, 0)
            (seg2_nso, seg2_rdo) = struct.unpack(
                '<qq', self._tdms_file.read(2 * 8)
            )
            self._seg2_length = (
                (seg2_nso - seg2_rdo)
                / nch
                / num.dtype(self._data_type).itemsize
            )
            if self.fileinfo['decimated']:
                n_complete_blk2 = num.int64(
                    self._seg2_length / self._chunk_size)
            else:
                n_complete_blk2 = num.int64(0)
            self._raw_data2 = num.ndarray(
                (n_complete_blk2, nch, self._chunk_size),
                dtype=self._data_type,
                buffer=dmap,
                offset=(nso + LEAD_IN_LENGTH + seg2_rdo),
            )
            self._raw_data2 = num.rollaxis(self._raw_data2, 2)
            additional_samples = num.int64(
                self._seg2_length - n_complete_blk2 * self._chunk_size
            )
            additional_samples_offset = (
                nso
                + LEAD_IN_LENGTH
                + seg2_rdo
                + n_complete_blk2
                * nch
                * self._chunk_size
                * num.dtype(self._data_type).itemsize
            )
            self._raw2_last_chunk = num.ndarray(
                (nch, additional_samples),
                dtype=self._data_type,
                buffer=dmap,
                offset=additional_samples_offset,
                order=ax_ord,
            )
            # Rotate the axes to [samples, nch]
            self._raw2_last_chunk = num.rollaxis(self._raw2_last_chunk, 1)

            if self._raw_data2.size != 0 or self._raw2_last_chunk.size != 0:
                pass
                # raise Exception('Second segment contains some data, \
                #                 not currently supported')
            self._channel_length = self._seg1_length + self._seg2_length
        # else:
        #     print "Not decimated"
        #     raise Exception('Reading file with decimated flag not set is not'
        #                     ' supported yet')


META_KEYS = {
    'measure_length': 'MeasureLength[m]',
    'start_position': 'StartPosition[m]',
    'spatial_resolution': 'SpatialResolution[m]',
    'fibre_index': 'FibreIndex',
    'unit_calibration': 'Unit Calibration (nm)',
    'start_distance': 'Start Distance (m)',
    'stop_distance': 'Stop Distance (m)',
    'normalization': 'Normalization',
    'decimation_filter': 'Decimation Filter',
    'gauge_length': 'GaugeLength',
    'norm_offset': 'Norm Offset',
    'source_mode': 'Source Mode',
    'time_decimation': 'Time Decimation',
    'zero_offset': 'Zero Offset (m)',
    'p_parameter': 'P',
    'p_coefficients': 'P Coefficients',
    'idas_version': 'iDASVersion',
    'precice_sampling_freq': 'Precise Sampling Frequency (Hz)',
    'receiver_gain': 'Receiver Gain',
    'continuous_mode': 'Continuous Mode',
    'geo_lat': 'SystemInfomation.GPS.Latitude',
    'geo_lon': 'SystemInfomation.GPS.Longitude',
    'geo_elevation': 'SystemInfomation.GPS.Altitude',

    'channel': None,
    'unit': None
}


def get_meta(tdms_properties):
    prop = tdms_properties

    deltat = 1. / prop['SamplingFrequency[Hz]']
    tmin = prop['GPSTimeStamp'].timestamp()

    fibre_meta = {key: prop.get(key_map, -1)
                  for key, key_map in META_KEYS.items()
                  if key_map is not None}

    coeff = fibre_meta['p_coefficients']
    try:
        coeff = tuple(map(float, coeff.split('\t')))
    except ValueError:
        coeff = tuple(map(float, coeff.split(';')))

    gain = fibre_meta['receiver_gain']
    try:
        gain = tuple(map(float, gain.split('\t')))
    except ValueError:
        gain = tuple(map(float, gain.split(';')))
    fibre_meta['receiver_gain'] = coeff

    fibre_meta['unit'] = 'radians'

    return deltat, tmin, fibre_meta


def iload(filename, load_data=True):
    tdms = TdmsReader(filename)
    deltat, tmin, meta = get_meta(tdms.get_properties())

    data = tdms.get_data().T.copy() if load_data else None

    for icha in range(tdms.n_channels):
        meta_cha = meta.copy()

        assert icha < 99999
        station = '%05i' % icha
        meta_cha['channel'] = icha

        nsamples = tdms.channel_length

        tr = trace.Trace(
            network='DA',
            station=station,
            ydata=None,
            deltat=deltat,
            tmin=tmin,
            tmax=tmin + (nsamples - 1) * deltat,
            meta=meta_cha)

        if data is not None:
            tr.set_ydata(data[icha])

        yield tr


def detect(first512):
    return first512.startswith(b'TDSm.')
Navigation

Source code for pyrocko.io.tdms_idas

Navigation