1import logging
2import os.path as op
3import struct
4import datetime
5import mmap
6import numpy as num
8from pyrocko import trace
10logger = logging.getLogger(__name__)
13def write_property_dict(prop_dict, out_file):
14 from pprint import pformat
16 f = open(out_file, "w")
17 f.write("tdms_property_map=")
18 f.write(pformat(prop_dict))
19 f.close()
22def type_not_supported(vargin):
23 """Function raises a NotImplementedException."""
24 raise NotImplementedError("Reading of this tdsDataType is not implemented")
27def parse_time_stamp(fractions, seconds):
28 """
29 Convert time TDMS time representation to datetime
30 fractions -- fractional seconds (2^-64)
31 seconds -- The number of seconds since 1/1/1904
32 @rtype : datetime.datetime
33 """
34 if (
35 fractions is not None
36 and seconds is not None
37 and fractions + seconds > 0
38 ):
39 return datetime.timedelta(
40 0, fractions * 2 ** -64 + seconds
41 ) + datetime.datetime(1904, 1, 1)
42 else:
43 return None
46# Enum mapping TDM data types to description string, numpy type where exists
47# See Ref[2] for enum values
48TDS_DATA_TYPE = dict(
49 {
50 0x00: "void", # tdsTypeVoid
51 0x01: "int8", # tdsTypeI8
52 0x02: "int16", # tdsTypeI16
53 0x03: "int32", # tdsTypeI32
54 0x04: "int64", # tdsTypeI64
55 0x05: "uint8", # tdsTypeU8
56 0x06: "uint16", # tdsTypeU16
57 0x07: "uint32", # tdsTypeU32
58 0x08: "uint64", # tdsTypeU64
59 0x09: "float32", # tdsTypeSingleFloat
60 0x0A: "float64", # tdsTypeDoubleFloat
61 0x0B: "float128", # tdsTypeExtendedFloat
62 0x19: "singleFloatWithUnit", # tdsTypeSingleFloatWithUnit
63 0x1A: "doubleFloatWithUnit", # tdsTypeDoubleFloatWithUnit
64 0x1B: "extendedFloatWithUnit", # tdsTypeExtendedFloatWithUnit
65 0x20: "str", # tdsTypeString
66 0x21: "bool", # tdsTypeBoolean
67 0x44: "datetime", # tdsTypeTimeStamp
68 0xFFFFFFFF: "raw", # tdsTypeDAQmxRawData
69 }
70)
72# Function mapping for reading TDMS data types
73TDS_READ_VAL = dict(
74 {
75 "void": lambda f: None, # tdsTypeVoid
76 "int8": lambda f: struct.unpack("<b", f.read(1))[0],
77 "int16": lambda f: struct.unpack("<h", f.read(2))[0],
78 "int32": lambda f: struct.unpack("<i", f.read(4))[0],
79 "int64": lambda f: struct.unpack("<q", f.read(8))[0],
80 "uint8": lambda f: struct.unpack("<B", f.read(1))[0],
81 "uint16": lambda f: struct.unpack("<H", f.read(2))[0],
82 "uint32": lambda f: struct.unpack("<I", f.read(4))[0],
83 "uint64": lambda f: struct.unpack("<Q", f.read(8))[0],
84 "float32": lambda f: struct.unpack("<f", f.read(4))[0],
85 "float64": lambda f: struct.unpack("<d", f.read(8))[0],
86 "float128": type_not_supported,
87 "singleFloatWithUnit": type_not_supported,
88 "doubleFloatWithUnit": type_not_supported,
89 "extendedFloatWithUnit": type_not_supported,
90 "str": lambda f: f.read(struct.unpack("<i", f.read(4))[0]),
91 "bool": lambda f: struct.unpack("<?", f.read(1))[0],
92 "datetime": lambda f: parse_time_stamp(
93 struct.unpack("<Q", f.read(8))[0],
94 struct.unpack("<q", f.read(8))[0],
95 ),
96 "raw": type_not_supported,
97 }
98)
100DECIMATE_MASK = 0b00100000
101LEAD_IN_LENGTH = 28
102FILEINFO_NAMES = (
103 "file_tag",
104 "toc",
105 "version",
106 "next_segment_offset",
107 "raw_data_offset",
108)
111class TdmsReader(object):
112 """A TDMS file reader object for reading properties and data"""
114 def __init__(self, filename):
115 self._properties = None
116 self._end_of_properties_offset = None
117 self._data_type = None
118 self._chunk_size = None
120 self._raw_data = None
121 self._raw_data2 = None # The mapped data in the 'Next Segment'
122 self._raw_last_chunk = None
123 self._raw2_last_chunk = None
125 self.file_size = op.getsize(filename)
126 self._channel_length = None
127 self._seg1_length = None
128 self._seg2_length = None
130 # TODO: Error if file not big enough to hold header
131 self._tdms_file = open(filename, "rb")
132 # Read lead in (28 bytes):
133 lead_in = self._tdms_file.read(LEAD_IN_LENGTH)
134 # lead_in is 28 bytes:
135 # [string of length 4][int32][int32][int64][int64]
136 fields = struct.unpack("<4siiQQ", lead_in)
138 if fields[0].decode() not in "TDSm":
139 msg = "Not a TDMS file (TDSm tag not found)"
140 raise (TypeError, msg)
142 self.fileinfo = dict(zip(FILEINFO_NAMES, fields))
143 self.fileinfo["decimated"] = not bool(
144 self.fileinfo["toc"] & DECIMATE_MASK
145 )
146 # Make offsets relative to beginning of file:
147 self.fileinfo["next_segment_offset"] += LEAD_IN_LENGTH
148 self.fileinfo["raw_data_offset"] += LEAD_IN_LENGTH
149 self.fileinfo["file_size"] = op.getsize(self._tdms_file.name)
151 # TODO: Validate lead in:
152 if self.fileinfo["next_segment_offset"] > self.file_size:
153 self.fileinfo["next_segment_offset"] = self.file_size
154 # raise(ValueError, "Next Segment Offset too large in TDMS header")
156 def __enter__(self):
157 return self
159 def __exit__(self, exc_type, exc_value, traceback):
160 self._tdms_file.close()
162 @property
163 def channel_length(self):
164 if self._properties is None:
165 self.get_properties()
167 rdo = num.int64(self.fileinfo["raw_data_offset"])
168 nch = num.int64(self.n_channels)
169 nso = self.fileinfo["next_segment_offset"]
170 return num.int64(
171 (nso - rdo) / nch / num.dtype(self._data_type).itemsize)
173 @property
174 def n_channels(self):
175 if self._properties is None:
176 self.get_properties()
177 return self.fileinfo['n_channels']
179 def get_properties(self, mapped=False):
180 """
181 Return a dictionary of properties. Read from file only if necessary.
182 """
183 # Check if already hold properties in memory
184 if self._properties is None:
185 self._properties = self._read_properties()
186 return self._properties
188 def _read_property(self):
189 """
190 Read a single property from the TDMS file.
191 Return the name, type and value of the property as a list.
192 """
193 # Read length of object path:
194 var = struct.unpack("<i", self._tdms_file.read(4))[0]
195 # Read property name and type:
196 name, data_type = struct.unpack(
197 "<{0}si".format(var), self._tdms_file.read(var + 4)
198 )
199 # Lookup function to read and parse property value based on type:
200 value = TDS_READ_VAL[TDS_DATA_TYPE[data_type]](self._tdms_file)
201 name = name.decode()
202 if data_type == 32:
203 value = value.decode()
205 return name, data_type, value
207 def _read_properties(self):
208 """Read the properties from the file"""
209 self._tdms_file.seek(LEAD_IN_LENGTH, 0)
210 # Number of channels is total objects - file objects - group objects
211 self.fileinfo["n_channels"] = (
212 struct.unpack("i", self._tdms_file.read(4))[0] - 2
213 )
214 # Read length of object path:
215 var = struct.unpack("<i", self._tdms_file.read(4))[0]
216 # skip over object path and raw data index:
217 self._tdms_file.seek(var + 4, 1)
218 # Read number of properties in this group:
219 var = struct.unpack("<i", self._tdms_file.read(4))[0]
221 # loop through and read each property
222 properties = [self._read_property() for _ in range(var)]
223 properties = {prop: value for (prop, type, value) in properties}
225 self._end_of_properties_offset = self._tdms_file.tell()
227 self._read_chunk_size()
228 # TODO: Add number of channels to properties
229 return properties
231 def _read_chunk_size(self):
232 """Read the data chunk size from the TDMS file header."""
233 if self._end_of_properties_offset is None:
234 self._read_properties()
236 self._tdms_file.seek(self._end_of_properties_offset, 0)
238 # skip over Group Information:
239 var = struct.unpack("<i", self._tdms_file.read(4))[0]
240 self._tdms_file.seek(var + 8, 1)
242 # skip over first channel path and length of index information:
243 var = struct.unpack("<i", self._tdms_file.read(4))[0]
244 self._tdms_file.seek(var + 4, 1)
246 self._data_type = TDS_DATA_TYPE.get(
247 struct.unpack("<i", self._tdms_file.read(4))[0]
248 )
249 if self._data_type not in ("int16", "float32"):
250 raise Exception("Unsupported TDMS data type: " + self._data_type)
252 # Read Dimension of the raw data array (has to be 1):
253 # dummy = struct.unpack("<i", self._tdms_file.read(4))[0]
255 self._chunk_size = struct.unpack("<i", self._tdms_file.read(4))[0]
257 def get_data(self, first_ch=0, last_ch=None, first_s=0, last_s=None):
258 """
259 Get a block of data from the TDMS file.
260 first_ch -- The first channel to load
261 last_ch -- The last channel to load
262 first_s -- The first sample to load
263 last_s -- The last sample to load
264 """
265 if self._raw_data is None:
266 self._initialise_data()
267 if first_ch is None or first_ch < 0:
268 first_ch = 0
269 if last_ch is None or last_ch >= self.n_channels:
270 last_ch = self.n_channels
271 else:
272 last_ch += 1
273 if last_s is None or last_s > self._channel_length:
274 last_s = self._channel_length
275 else:
276 last_s += 1
277 nch = num.int64(max(last_ch - first_ch, 0))
278 ns = num.int64(max(last_s - first_s, 0))
280 # Allocate output container
281 data = num.empty((ns, nch), dtype=num.dtype(self._data_type))
282 if data.size == 0:
283 return data
285 # 1. Index first block & reshape?
286 first_blk = first_s // self._chunk_size
287 last_blk = last_s // self._chunk_size
288 last_full_blk = min(last_blk + 1, self._raw_data.shape[1])
289 nchunk = min(
290 max(last_full_blk - first_blk, 0), self._raw_data.shape[1]
291 )
292 first_s_1a = max(first_s - first_blk * self._chunk_size, 0)
293 last_s_1a = min(
294 last_s - first_blk * self._chunk_size, nchunk * self._chunk_size
295 )
296 ind_s = 0
297 ind_e = ind_s + max(last_s_1a - first_s_1a, 0)
299 d = self._raw_data[:, first_blk:last_full_blk, first_ch:last_ch]
300 d.shape = (self._chunk_size * nchunk, nch)
301 d.reshape((self._chunk_size * nchunk, nch), order="F")
302 data[ind_s:ind_e, :] = d[first_s_1a:last_s_1a, :]
304 # 2. Index first additional samples
305 first_s_1b = max(
306 first_s - self._raw_data.shape[1] * self._chunk_size, 0
307 )
308 last_s_1b = min(
309 last_s - self._raw_data.shape[1] * self._chunk_size,
310 self._raw_last_chunk.shape[0],
311 )
312 ind_s = ind_e
313 ind_e = ind_s + max(last_s_1b - first_s_1b, 0)
314 # data_1b = self._raw_last_chunk[first_s_1b:last_s_1b,first_ch:last_ch]
315 if ind_e > ind_s:
316 data[ind_s:ind_e, :] = self._raw_last_chunk[
317 first_s_1b:last_s_1b, first_ch:last_ch
318 ]
320 # 3. Index second block
321 first_s_2 = max(first_s - self._seg1_length, 0)
322 last_s_2 = last_s - self._seg1_length
323 if (first_s_2 > 0 or last_s_2 > 0) and self._raw_data2 is not None:
324 first_blk_2 = max(first_s_2 // self._chunk_size, 0)
325 last_blk_2 = max(last_s_2 // self._chunk_size, 0)
326 last_full_blk_2 = min(last_blk_2 + 1, self._raw_data2.shape[1])
327 nchunk_2 = min(
328 max(last_full_blk_2 - first_blk_2, 0), self._raw_data2.shape[1]
329 )
330 first_s_2a = max(first_s_2 - first_blk_2 * self._chunk_size, 0)
331 last_s_2a = min(
332 last_s_2 - first_blk_2 * self._chunk_size,
333 nchunk_2 * self._chunk_size,
334 )
335 ind_s = ind_e
336 ind_e = ind_s + max(last_s_2a - first_s_2a, 0)
337 # data_2a = self._raw_data2[:, first_blk_2:last_full_blk_2,
338 # first_ch:last_ch]\
339 # .reshape((self._chunk_size*nchunk_2, nch), order='F')\
340 # [first_s_2a:last_s_2a, :]
341 if ind_e > ind_s:
342 data[ind_s:ind_e, :] = self._raw_data2[
343 :, first_blk_2:last_full_blk_2, first_ch:last_ch
344 ].reshape((self._chunk_size * nchunk_2, nch), order="F")[
345 first_s_2a:last_s_2a, :
346 ]
347 # 4. Index second additional samples
348 if (
349 first_s_2 > 0 or last_s_2 > 0
350 ) and self._raw2_last_chunk is not None:
351 first_s_2b = max(
352 first_s_2 - self._raw_data2.shape[1] * self._chunk_size, 0
353 )
354 last_s_2b = min(
355 last_s_2 - self._raw_data2.shape[1] * self._chunk_size,
356 self._raw2_last_chunk.shape[0],
357 )
358 ind_s = ind_e
359 ind_e = ind_s + max(last_s_2b - first_s_2b, 0)
360 # data_2b = \
361 # self._raw2_last_chunk[first_s_2b:last_s_2b,first_ch:last_ch]
362 if ind_e > ind_s:
363 data[ind_s:ind_e, :] = self._raw2_last_chunk[
364 first_s_2b:last_s_2b, first_ch:last_ch
365 ]
366 # 5. Concatenate blocks
367 # data = num.concatenate((data_1a, data_1b, data_2a, data_2b))
368 if data.size == 0:
369 data = data.reshape(0, 0)
370 return data
372 def _initialise_data(self):
373 """Initialise the memory map for the data array."""
374 if self._chunk_size is None:
375 self._read_chunk_size()
377 dmap = mmap.mmap(self._tdms_file.fileno(), 0, access=mmap.ACCESS_READ)
378 rdo = num.int64(self.fileinfo["raw_data_offset"])
379 nch = num.int64(self.n_channels)
381 # TODO: Support streaming file type?
382 # TODO: Is this a valid calculation for ChannelLength?
383 nso = self.fileinfo["next_segment_offset"]
384 self._seg1_length = num.int64(
385 (nso - rdo) / nch / num.dtype(self._data_type).itemsize
386 )
387 self._channel_length = self._seg1_length
389 if self.fileinfo["decimated"]:
390 n_complete_blk = num.int64(self._seg1_length / self._chunk_size)
391 ax_ord = "C"
392 else:
393 n_complete_blk = 0
394 ax_ord = "F"
395 self._raw_data = num.ndarray(
396 (n_complete_blk, nch, self._chunk_size),
397 dtype=self._data_type,
398 buffer=dmap,
399 offset=rdo,
400 )
401 # Rotate the axes to [chunk_size, nblk, nch]
402 self._raw_data = num.rollaxis(self._raw_data, 2)
403 additional_samples = num.int64(
404 self._seg1_length - n_complete_blk * self._chunk_size
405 )
406 additional_samples_offset = (
407 rdo
408 + n_complete_blk
409 * nch
410 * self._chunk_size
411 * num.dtype(self._data_type).itemsize
412 )
413 self._raw_last_chunk = num.ndarray(
414 (nch, additional_samples),
415 dtype=self._data_type,
416 buffer=dmap,
417 offset=additional_samples_offset,
418 order=ax_ord,
419 )
420 # Rotate the axes to [samples, nch]
421 self._raw_last_chunk = num.rollaxis(self._raw_last_chunk, 1)
423 if self.file_size == nso:
424 self._seg2_length = 0
425 else:
426 self._tdms_file.seek(nso + 12, 0)
427 (seg2_nso, seg2_rdo) = struct.unpack(
428 "<qq", self._tdms_file.read(2 * 8)
429 )
430 self._seg2_length = (
431 (seg2_nso - seg2_rdo)
432 / nch
433 / num.dtype(self._data_type).itemsize
434 )
435 if self.fileinfo["decimated"]:
436 n_complete_blk2 = num.int64(
437 self._seg2_length / self._chunk_size)
438 else:
439 n_complete_blk2 = num.int64(0)
440 self._raw_data2 = num.ndarray(
441 (n_complete_blk2, nch, self._chunk_size),
442 dtype=self._data_type,
443 buffer=dmap,
444 offset=(nso + LEAD_IN_LENGTH + seg2_rdo),
445 )
446 self._raw_data2 = num.rollaxis(self._raw_data2, 2)
447 additional_samples = num.int64(
448 self._seg2_length - n_complete_blk2 * self._chunk_size
449 )
450 additional_samples_offset = (
451 nso
452 + LEAD_IN_LENGTH
453 + seg2_rdo
454 + n_complete_blk2
455 * nch
456 * self._chunk_size
457 * num.dtype(self._data_type).itemsize
458 )
459 self._raw2_last_chunk = num.ndarray(
460 (nch, additional_samples),
461 dtype=self._data_type,
462 buffer=dmap,
463 offset=additional_samples_offset,
464 order=ax_ord,
465 )
466 # Rotate the axes to [samples, nch]
467 self._raw2_last_chunk = num.rollaxis(self._raw2_last_chunk, 1)
469 if self._raw_data2.size != 0 or self._raw2_last_chunk.size != 0:
470 pass
471 # raise Exception('Second segment contains some data, \
472 # not currently supported')
473 self._channel_length = self._seg1_length + self._seg2_length
474 # else:
475 # print "Not decimated"
476 # raise Exception('Reading file with decimated flag not set is not'
477 # ' supported yet')
480META_KEYS = {
481 'measure_length': 'MeasureLength[m]',
482 'start_position': 'StartPosition[m]',
483 'spatial_resolution': 'SpatialResolution[m]',
484 'fibre_index': 'FibreIndex',
485 'unit_calibration': 'Unit Calibration (nm)',
486 'start_distance': 'Start Distance (m)',
487 'stop_distance': 'Stop Distance (m)',
488 'normalization': 'Normalization',
489 'decimation_filter': 'Decimation Filter',
490 'gauge_length': 'GaugeLength',
491 'norm_offset': 'Norm Offset',
492 'source_mode': 'Source Mode',
493 'time_decimation': 'Time Decimation',
494 'zero_offset': 'Zero Offset (m)',
495 'p_parameter': 'P',
496 'p_coefficients': 'P Coefficients',
497 'idas_version': 'iDASVersion',
498 'precice_sampling_freq': 'Precise Sampling Frequency (Hz)',
499 'receiver_gain': 'Receiver Gain',
500 'continuous_mode': 'Continuous Mode',
501 'geo_lat': 'SystemInfomation.GPS.Latitude',
502 'geo_lon': 'SystemInfomation.GPS.Longitude',
503 'geo_elevation': 'SystemInfomation.GPS.Altitude',
505 'channel': None,
506 'unit': None
507}
510def get_meta(tdms_properties):
511 prop = tdms_properties
513 deltat = 1. / prop['SamplingFrequency[Hz]']
514 tmin = prop['GPSTimeStamp'].timestamp()
516 fibre_meta = {key: prop.get(key_map, -1)
517 for key, key_map in META_KEYS.items()
518 if key_map is not None}
520 coeff = fibre_meta['p_coefficients']
521 try:
522 coeff = tuple(map(float, coeff.split('\t')))
523 except ValueError:
524 coeff = tuple(map(float, coeff.split(';')))
526 gain = fibre_meta['receiver_gain']
527 try:
528 gain = tuple(map(float, gain.split('\t')))
529 except ValueError:
530 gain = tuple(map(float, gain.split(';')))
531 fibre_meta['receiver_gain'] = coeff
533 fibre_meta['unit'] = 'radians'
535 return deltat, tmin, fibre_meta
538def iload(filename, load_data=True):
539 tdms = TdmsReader(filename)
540 deltat, tmin, meta = get_meta(tdms.get_properties())
542 data = tdms.get_data().T.copy() if load_data else None
544 for icha in range(tdms.n_channels):
545 meta_cha = meta.copy()
547 assert icha < 99999
548 station = '%05i' % icha
549 meta_cha['channel'] = icha
551 nsamples = tdms.channel_length
553 tr = trace.Trace(
554 network='DA',
555 station=station,
556 ydata=None,
557 deltat=deltat,
558 tmin=tmin,
559 tmax=tmin + (nsamples - 1) * deltat,
560 meta=meta_cha)
562 if data is not None:
563 tr.set_ydata(data[icha])
565 yield tr
568def detect(first512):
569 return first512.startswith(b'TDSm.')