import logging
import os.path as op
import struct
import datetime
import mmap
import numpy as num
from pyrocko import trace
logger = logging.getLogger(__name__)
def write_property_dict(prop_dict, out_file):
from pprint import pformat
f = open(out_file, "w")
f.write("tdms_property_map=")
f.write(pformat(prop_dict))
f.close()
[docs]def type_not_supported(vargin):
"""Function raises a NotImplementedException."""
raise NotImplementedError("Reading of this tdsDataType is not implemented")
[docs]def parse_time_stamp(fractions, seconds):
"""
Convert time TDMS time representation to datetime
fractions -- fractional seconds (2^-64)
seconds -- The number of seconds since 1/1/1904
@rtype : datetime.datetime
"""
if (
fractions is not None
and seconds is not None
and fractions + seconds > 0
):
return datetime.timedelta(
0, fractions * 2 ** -64 + seconds
) + datetime.datetime(1904, 1, 1)
else:
return None
# Enum mapping TDM data types to description string, numpy type where exists
# See Ref[2] for enum values
TDS_DATA_TYPE = dict(
{
0x00: "void", # tdsTypeVoid
0x01: "int8", # tdsTypeI8
0x02: "int16", # tdsTypeI16
0x03: "int32", # tdsTypeI32
0x04: "int64", # tdsTypeI64
0x05: "uint8", # tdsTypeU8
0x06: "uint16", # tdsTypeU16
0x07: "uint32", # tdsTypeU32
0x08: "uint64", # tdsTypeU64
0x09: "float32", # tdsTypeSingleFloat
0x0A: "float64", # tdsTypeDoubleFloat
0x0B: "float128", # tdsTypeExtendedFloat
0x19: "singleFloatWithUnit", # tdsTypeSingleFloatWithUnit
0x1A: "doubleFloatWithUnit", # tdsTypeDoubleFloatWithUnit
0x1B: "extendedFloatWithUnit", # tdsTypeExtendedFloatWithUnit
0x20: "str", # tdsTypeString
0x21: "bool", # tdsTypeBoolean
0x44: "datetime", # tdsTypeTimeStamp
0xFFFFFFFF: "raw", # tdsTypeDAQmxRawData
}
)
# Function mapping for reading TDMS data types
TDS_READ_VAL = dict(
{
"void": lambda f: None, # tdsTypeVoid
"int8": lambda f: struct.unpack("<b", f.read(1))[0],
"int16": lambda f: struct.unpack("<h", f.read(2))[0],
"int32": lambda f: struct.unpack("<i", f.read(4))[0],
"int64": lambda f: struct.unpack("<q", f.read(8))[0],
"uint8": lambda f: struct.unpack("<B", f.read(1))[0],
"uint16": lambda f: struct.unpack("<H", f.read(2))[0],
"uint32": lambda f: struct.unpack("<I", f.read(4))[0],
"uint64": lambda f: struct.unpack("<Q", f.read(8))[0],
"float32": lambda f: struct.unpack("<f", f.read(4))[0],
"float64": lambda f: struct.unpack("<d", f.read(8))[0],
"float128": type_not_supported,
"singleFloatWithUnit": type_not_supported,
"doubleFloatWithUnit": type_not_supported,
"extendedFloatWithUnit": type_not_supported,
"str": lambda f: f.read(struct.unpack("<i", f.read(4))[0]),
"bool": lambda f: struct.unpack("<?", f.read(1))[0],
"datetime": lambda f: parse_time_stamp(
struct.unpack("<Q", f.read(8))[0],
struct.unpack("<q", f.read(8))[0],
),
"raw": type_not_supported,
}
)
DECIMATE_MASK = 0b00100000
LEAD_IN_LENGTH = 28
FILEINFO_NAMES = (
"file_tag",
"toc",
"version",
"next_segment_offset",
"raw_data_offset",
)
[docs]class TdmsReader(object):
"""A TDMS file reader object for reading properties and data"""
def __init__(self, filename):
self._properties = None
self._end_of_properties_offset = None
self._data_type = None
self._chunk_size = None
self._raw_data = None
self._raw_data2 = None # The mapped data in the 'Next Segment'
self._raw_last_chunk = None
self._raw2_last_chunk = None
self.file_size = op.getsize(filename)
self._channel_length = None
self._seg1_length = None
self._seg2_length = None
# TODO: Error if file not big enough to hold header
self._tdms_file = open(filename, "rb")
# Read lead in (28 bytes):
lead_in = self._tdms_file.read(LEAD_IN_LENGTH)
# lead_in is 28 bytes:
# [string of length 4][int32][int32][int64][int64]
fields = struct.unpack("<4siiQQ", lead_in)
if fields[0].decode() not in "TDSm":
msg = "Not a TDMS file (TDSm tag not found)"
raise (TypeError, msg)
self.fileinfo = dict(zip(FILEINFO_NAMES, fields))
self.fileinfo["decimated"] = not bool(
self.fileinfo["toc"] & DECIMATE_MASK
)
# Make offsets relative to beginning of file:
self.fileinfo["next_segment_offset"] += LEAD_IN_LENGTH
self.fileinfo["raw_data_offset"] += LEAD_IN_LENGTH
self.fileinfo["file_size"] = op.getsize(self._tdms_file.name)
# TODO: Validate lead in:
if self.fileinfo["next_segment_offset"] > self.file_size:
self.fileinfo["next_segment_offset"] = self.file_size
# raise(ValueError, "Next Segment Offset too large in TDMS header")
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self._tdms_file.close()
@property
def channel_length(self):
if self._properties is None:
self.get_properties()
rdo = num.int64(self.fileinfo["raw_data_offset"])
nch = num.int64(self.n_channels)
nso = self.fileinfo["next_segment_offset"]
return num.int64(
(nso - rdo) / nch / num.dtype(self._data_type).itemsize)
@property
def n_channels(self):
if self._properties is None:
self.get_properties()
return self.fileinfo['n_channels']
[docs] def get_properties(self, mapped=False):
"""
Return a dictionary of properties. Read from file only if necessary.
"""
# Check if already hold properties in memory
if self._properties is None:
self._properties = self._read_properties()
return self._properties
def _read_property(self):
"""
Read a single property from the TDMS file.
Return the name, type and value of the property as a list.
"""
# Read length of object path:
var = struct.unpack("<i", self._tdms_file.read(4))[0]
# Read property name and type:
name, data_type = struct.unpack(
"<{0}si".format(var), self._tdms_file.read(var + 4)
)
# Lookup function to read and parse property value based on type:
value = TDS_READ_VAL[TDS_DATA_TYPE[data_type]](self._tdms_file)
name = name.decode()
if data_type == 32:
value = value.decode()
return name, data_type, value
def _read_properties(self):
"""Read the properties from the file"""
self._tdms_file.seek(LEAD_IN_LENGTH, 0)
# Number of channels is total objects - file objects - group objects
self.fileinfo["n_channels"] = (
struct.unpack("i", self._tdms_file.read(4))[0] - 2
)
# Read length of object path:
var = struct.unpack("<i", self._tdms_file.read(4))[0]
# skip over object path and raw data index:
self._tdms_file.seek(var + 4, 1)
# Read number of properties in this group:
var = struct.unpack("<i", self._tdms_file.read(4))[0]
# loop through and read each property
properties = [self._read_property() for _ in range(var)]
properties = {prop: value for (prop, type, value) in properties}
self._end_of_properties_offset = self._tdms_file.tell()
self._read_chunk_size()
# TODO: Add number of channels to properties
return properties
def _read_chunk_size(self):
"""Read the data chunk size from the TDMS file header."""
if self._end_of_properties_offset is None:
self._read_properties()
self._tdms_file.seek(self._end_of_properties_offset, 0)
# skip over Group Information:
var = struct.unpack("<i", self._tdms_file.read(4))[0]
self._tdms_file.seek(var + 8, 1)
# skip over first channel path and length of index information:
var = struct.unpack("<i", self._tdms_file.read(4))[0]
self._tdms_file.seek(var + 4, 1)
self._data_type = TDS_DATA_TYPE.get(
struct.unpack("<i", self._tdms_file.read(4))[0]
)
if self._data_type not in ("int16", "float32"):
raise Exception("Unsupported TDMS data type: " + self._data_type)
# Read Dimension of the raw data array (has to be 1):
# dummy = struct.unpack("<i", self._tdms_file.read(4))[0]
self._chunk_size = struct.unpack("<i", self._tdms_file.read(4))[0]
[docs] def get_data(self, first_ch=0, last_ch=None, first_s=0, last_s=None):
"""
Get a block of data from the TDMS file.
first_ch -- The first channel to load
last_ch -- The last channel to load
first_s -- The first sample to load
last_s -- The last sample to load
"""
if self._raw_data is None:
self._initialise_data()
if first_ch is None or first_ch < 0:
first_ch = 0
if last_ch is None or last_ch >= self.n_channels:
last_ch = self.n_channels
else:
last_ch += 1
if last_s is None or last_s > self._channel_length:
last_s = self._channel_length
else:
last_s += 1
nch = num.int64(max(last_ch - first_ch, 0))
ns = num.int64(max(last_s - first_s, 0))
# Allocate output container
data = num.empty((ns, nch), dtype=num.dtype(self._data_type))
if data.size == 0:
return data
# 1. Index first block & reshape?
first_blk = first_s // self._chunk_size
last_blk = last_s // self._chunk_size
last_full_blk = min(last_blk + 1, self._raw_data.shape[1])
nchunk = min(
max(last_full_blk - first_blk, 0), self._raw_data.shape[1]
)
first_s_1a = max(first_s - first_blk * self._chunk_size, 0)
last_s_1a = min(
last_s - first_blk * self._chunk_size, nchunk * self._chunk_size
)
ind_s = 0
ind_e = ind_s + max(last_s_1a - first_s_1a, 0)
d = self._raw_data[:, first_blk:last_full_blk, first_ch:last_ch]
d.shape = (self._chunk_size * nchunk, nch)
d.reshape((self._chunk_size * nchunk, nch), order="F")
data[ind_s:ind_e, :] = d[first_s_1a:last_s_1a, :]
# 2. Index first additional samples
first_s_1b = max(
first_s - self._raw_data.shape[1] * self._chunk_size, 0
)
last_s_1b = min(
last_s - self._raw_data.shape[1] * self._chunk_size,
self._raw_last_chunk.shape[0],
)
ind_s = ind_e
ind_e = ind_s + max(last_s_1b - first_s_1b, 0)
# data_1b = self._raw_last_chunk[first_s_1b:last_s_1b,first_ch:last_ch]
if ind_e > ind_s:
data[ind_s:ind_e, :] = self._raw_last_chunk[
first_s_1b:last_s_1b, first_ch:last_ch
]
# 3. Index second block
first_s_2 = max(first_s - self._seg1_length, 0)
last_s_2 = last_s - self._seg1_length
if (first_s_2 > 0 or last_s_2 > 0) and self._raw_data2 is not None:
first_blk_2 = max(first_s_2 // self._chunk_size, 0)
last_blk_2 = max(last_s_2 // self._chunk_size, 0)
last_full_blk_2 = min(last_blk_2 + 1, self._raw_data2.shape[1])
nchunk_2 = min(
max(last_full_blk_2 - first_blk_2, 0), self._raw_data2.shape[1]
)
first_s_2a = max(first_s_2 - first_blk_2 * self._chunk_size, 0)
last_s_2a = min(
last_s_2 - first_blk_2 * self._chunk_size,
nchunk_2 * self._chunk_size,
)
ind_s = ind_e
ind_e = ind_s + max(last_s_2a - first_s_2a, 0)
# data_2a = self._raw_data2[:, first_blk_2:last_full_blk_2,
# first_ch:last_ch]\
# .reshape((self._chunk_size*nchunk_2, nch), order='F')\
# [first_s_2a:last_s_2a, :]
if ind_e > ind_s:
data[ind_s:ind_e, :] = self._raw_data2[
:, first_blk_2:last_full_blk_2, first_ch:last_ch
].reshape((self._chunk_size * nchunk_2, nch), order="F")[
first_s_2a:last_s_2a, :
]
# 4. Index second additional samples
if (
first_s_2 > 0 or last_s_2 > 0
) and self._raw2_last_chunk is not None:
first_s_2b = max(
first_s_2 - self._raw_data2.shape[1] * self._chunk_size, 0
)
last_s_2b = min(
last_s_2 - self._raw_data2.shape[1] * self._chunk_size,
self._raw2_last_chunk.shape[0],
)
ind_s = ind_e
ind_e = ind_s + max(last_s_2b - first_s_2b, 0)
# data_2b = \
# self._raw2_last_chunk[first_s_2b:last_s_2b,first_ch:last_ch]
if ind_e > ind_s:
data[ind_s:ind_e, :] = self._raw2_last_chunk[
first_s_2b:last_s_2b, first_ch:last_ch
]
# 5. Concatenate blocks
# data = num.concatenate((data_1a, data_1b, data_2a, data_2b))
if data.size == 0:
data = data.reshape(0, 0)
return data
def _initialise_data(self):
"""Initialise the memory map for the data array."""
if self._chunk_size is None:
self._read_chunk_size()
dmap = mmap.mmap(self._tdms_file.fileno(), 0, access=mmap.ACCESS_READ)
rdo = num.int64(self.fileinfo["raw_data_offset"])
nch = num.int64(self.n_channels)
# TODO: Support streaming file type?
# TODO: Is this a valid calculation for ChannelLength?
nso = self.fileinfo["next_segment_offset"]
self._seg1_length = num.int64(
(nso - rdo) / nch / num.dtype(self._data_type).itemsize
)
self._channel_length = self._seg1_length
if self.fileinfo["decimated"]:
n_complete_blk = num.int64(self._seg1_length / self._chunk_size)
ax_ord = "C"
else:
n_complete_blk = 0
ax_ord = "F"
self._raw_data = num.ndarray(
(n_complete_blk, nch, self._chunk_size),
dtype=self._data_type,
buffer=dmap,
offset=rdo,
)
# Rotate the axes to [chunk_size, nblk, nch]
self._raw_data = num.rollaxis(self._raw_data, 2)
additional_samples = num.int64(
self._seg1_length - n_complete_blk * self._chunk_size
)
additional_samples_offset = (
rdo
+ n_complete_blk
* nch
* self._chunk_size
* num.dtype(self._data_type).itemsize
)
self._raw_last_chunk = num.ndarray(
(nch, additional_samples),
dtype=self._data_type,
buffer=dmap,
offset=additional_samples_offset,
order=ax_ord,
)
# Rotate the axes to [samples, nch]
self._raw_last_chunk = num.rollaxis(self._raw_last_chunk, 1)
if self.file_size == nso:
self._seg2_length = 0
else:
self._tdms_file.seek(nso + 12, 0)
(seg2_nso, seg2_rdo) = struct.unpack(
"<qq", self._tdms_file.read(2 * 8)
)
self._seg2_length = (
(seg2_nso - seg2_rdo)
/ nch
/ num.dtype(self._data_type).itemsize
)
if self.fileinfo["decimated"]:
n_complete_blk2 = num.int64(
self._seg2_length / self._chunk_size)
else:
n_complete_blk2 = num.int64(0)
self._raw_data2 = num.ndarray(
(n_complete_blk2, nch, self._chunk_size),
dtype=self._data_type,
buffer=dmap,
offset=(nso + LEAD_IN_LENGTH + seg2_rdo),
)
self._raw_data2 = num.rollaxis(self._raw_data2, 2)
additional_samples = num.int64(
self._seg2_length - n_complete_blk2 * self._chunk_size
)
additional_samples_offset = (
nso
+ LEAD_IN_LENGTH
+ seg2_rdo
+ n_complete_blk2
* nch
* self._chunk_size
* num.dtype(self._data_type).itemsize
)
self._raw2_last_chunk = num.ndarray(
(nch, additional_samples),
dtype=self._data_type,
buffer=dmap,
offset=additional_samples_offset,
order=ax_ord,
)
# Rotate the axes to [samples, nch]
self._raw2_last_chunk = num.rollaxis(self._raw2_last_chunk, 1)
if self._raw_data2.size != 0 or self._raw2_last_chunk.size != 0:
pass
# raise Exception('Second segment contains some data, \
# not currently supported')
self._channel_length = self._seg1_length + self._seg2_length
# else:
# print "Not decimated"
# raise Exception('Reading file with decimated flag not set is not'
# ' supported yet')
META_KEYS = {
'measure_length': 'MeasureLength[m]',
'start_position': 'StartPosition[m]',
'spatial_resolution': 'SpatialResolution[m]',
'fibre_index': 'FibreIndex',
'unit_calibration': 'Unit Calibration (nm)',
'start_distance': 'Start Distance (m)',
'stop_distance': 'Stop Distance (m)',
'normalization': 'Normalization',
'decimation_filter': 'Decimation Filter',
'gauge_length': 'GaugeLength',
'norm_offset': 'Norm Offset',
'source_mode': 'Source Mode',
'time_decimation': 'Time Decimation',
'zero_offset': 'Zero Offset (m)',
'p_parameter': 'P',
'p_coefficients': 'P Coefficients',
'idas_version': 'iDASVersion',
'precice_sampling_freq': 'Precise Sampling Frequency (Hz)',
'receiver_gain': 'Receiver Gain',
'continuous_mode': 'Continuous Mode',
'geo_lat': 'SystemInfomation.GPS.Latitude',
'geo_lon': 'SystemInfomation.GPS.Longitude',
'geo_elevation': 'SystemInfomation.GPS.Altitude',
'channel': None,
'unit': None
}
def get_meta(tdms_properties):
prop = tdms_properties
deltat = 1. / prop['SamplingFrequency[Hz]']
tmin = prop['GPSTimeStamp'].timestamp()
fibre_meta = {key: prop.get(key_map, -1)
for key, key_map in META_KEYS.items()
if key_map is not None}
coeff = fibre_meta['p_coefficients']
try:
coeff = tuple(map(float, coeff.split('\t')))
except ValueError:
coeff = tuple(map(float, coeff.split(';')))
gain = fibre_meta['receiver_gain']
try:
gain = tuple(map(float, gain.split('\t')))
except ValueError:
gain = tuple(map(float, gain.split(';')))
fibre_meta['receiver_gain'] = coeff
fibre_meta['unit'] = 'radians'
return deltat, tmin, fibre_meta
def iload(filename, load_data=True):
tdms = TdmsReader(filename)
deltat, tmin, meta = get_meta(tdms.get_properties())
data = tdms.get_data().T.copy() if load_data else None
for icha in range(tdms.n_channels):
meta_cha = meta.copy()
assert icha < 99999
station = '%05i' % icha
meta_cha['channel'] = icha
nsamples = tdms.channel_length
tr = trace.Trace(
network='DA',
station=station,
ydata=None,
deltat=deltat,
tmin=tmin,
tmax=tmin + (nsamples - 1) * deltat,
meta=meta_cha)
if data is not None:
tr.set_ydata(data[icha])
yield tr
def detect(first512):
return first512.startswith(b'TDSm.')