Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/io/tdms_idas.py: 76%
231 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-23 12:35 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-23 12:35 +0000
1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7Reader for `Silixia iDAS
8<https://silixa.com/technology/idas-intelligent-distributed-acoustic-sensor/>`_
9TDMS files.
10'''
12import logging
13import os.path as op
14import struct
15import datetime
16import mmap
17import numpy as num
19from pyrocko import trace
21logger = logging.getLogger(__name__)
24def write_property_dict(prop_dict, out_file):
25 from pprint import pformat
27 f = open(out_file, 'w')
28 f.write('tdms_property_map=')
29 f.write(pformat(prop_dict))
30 f.close()
33def type_not_supported(vargin):
34 '''Function raises a NotImplementedException.'''
35 raise NotImplementedError('Reading of this tdsDataType is not implemented')
38def parse_time_stamp(fractions, seconds):
39 '''
40 Convert time TDMS time representation to datetime
41 fractions -- fractional seconds (2^-64)
42 seconds -- The number of seconds since 1/1/1904
43 @rtype : datetime.datetime
44 '''
45 if (
46 fractions is not None
47 and seconds is not None
48 and fractions + seconds > 0
49 ):
50 return datetime.timedelta(
51 0, fractions * 2 ** -64 + seconds
52 ) + datetime.datetime(1904, 1, 1)
53 else:
54 return None
57# Enum mapping TDM data types to description string, numpy type where exists
58# See Ref[2] for enum values
59TDS_DATA_TYPE = dict(
60 {
61 0x00: 'void', # tdsTypeVoid
62 0x01: 'int8', # tdsTypeI8
63 0x02: 'int16', # tdsTypeI16
64 0x03: 'int32', # tdsTypeI32
65 0x04: 'int64', # tdsTypeI64
66 0x05: 'uint8', # tdsTypeU8
67 0x06: 'uint16', # tdsTypeU16
68 0x07: 'uint32', # tdsTypeU32
69 0x08: 'uint64', # tdsTypeU64
70 0x09: 'float32', # tdsTypeSingleFloat
71 0x0A: 'float64', # tdsTypeDoubleFloat
72 0x0B: 'float128', # tdsTypeExtendedFloat
73 0x19: 'singleFloatWithUnit', # tdsTypeSingleFloatWithUnit
74 0x1A: 'doubleFloatWithUnit', # tdsTypeDoubleFloatWithUnit
75 0x1B: 'extendedFloatWithUnit', # tdsTypeExtendedFloatWithUnit
76 0x20: 'str', # tdsTypeString
77 0x21: 'bool', # tdsTypeBoolean
78 0x44: 'datetime', # tdsTypeTimeStamp
79 0xFFFFFFFF: 'raw', # tdsTypeDAQmxRawData
80 }
81)
83# Function mapping for reading TDMS data types
84TDS_READ_VAL = dict(
85 {
86 'void': lambda f: None, # tdsTypeVoid
87 'int8': lambda f: struct.unpack('<b', f.read(1))[0],
88 'int16': lambda f: struct.unpack('<h', f.read(2))[0],
89 'int32': lambda f: struct.unpack('<i', f.read(4))[0],
90 'int64': lambda f: struct.unpack('<q', f.read(8))[0],
91 'uint8': lambda f: struct.unpack('<B', f.read(1))[0],
92 'uint16': lambda f: struct.unpack('<H', f.read(2))[0],
93 'uint32': lambda f: struct.unpack('<I', f.read(4))[0],
94 'uint64': lambda f: struct.unpack('<Q', f.read(8))[0],
95 'float32': lambda f: struct.unpack('<f', f.read(4))[0],
96 'float64': lambda f: struct.unpack('<d', f.read(8))[0],
97 'float128': type_not_supported,
98 'singleFloatWithUnit': type_not_supported,
99 'doubleFloatWithUnit': type_not_supported,
100 'extendedFloatWithUnit': type_not_supported,
101 'str': lambda f: f.read(struct.unpack('<i', f.read(4))[0]),
102 'bool': lambda f: struct.unpack('<?', f.read(1))[0],
103 'datetime': lambda f: parse_time_stamp(
104 struct.unpack('<Q', f.read(8))[0],
105 struct.unpack('<q', f.read(8))[0],
106 ),
107 'raw': type_not_supported,
108 }
109)
111DECIMATE_MASK = 0b00100000
112LEAD_IN_LENGTH = 28
113FILEINFO_NAMES = (
114 'file_tag',
115 'toc',
116 'version',
117 'next_segment_offset',
118 'raw_data_offset',
119)
122class TdmsReader(object):
123 '''A TDMS file reader object for reading properties and data'''
125 def __init__(self, filename):
126 self._properties = None
127 self._end_of_properties_offset = None
128 self._data_type = None
129 self._chunk_size = None
131 self._raw_data = None
132 self._raw_data2 = None # The mapped data in the 'Next Segment'
133 self._raw_last_chunk = None
134 self._raw2_last_chunk = None
136 self.file_size = op.getsize(filename)
137 self._channel_length = None
138 self._seg1_length = None
139 self._seg2_length = None
141 # TODO: Error if file not big enough to hold header
142 self._tdms_file = open(filename, 'rb')
143 # Read lead in (28 bytes):
144 lead_in = self._tdms_file.read(LEAD_IN_LENGTH)
145 # lead_in is 28 bytes:
146 # [string of length 4][int32][int32][int64][int64]
147 fields = struct.unpack('<4siiQQ', lead_in)
149 if fields[0].decode() not in 'TDSm':
150 msg = 'Not a TDMS file (TDSm tag not found)'
151 raise (TypeError, msg)
153 self.fileinfo = dict(zip(FILEINFO_NAMES, fields))
154 self.fileinfo['decimated'] = not bool(
155 self.fileinfo['toc'] & DECIMATE_MASK
156 )
157 # Make offsets relative to beginning of file:
158 self.fileinfo['next_segment_offset'] += LEAD_IN_LENGTH
159 self.fileinfo['raw_data_offset'] += LEAD_IN_LENGTH
160 self.fileinfo['file_size'] = op.getsize(self._tdms_file.name)
162 # TODO: Validate lead in:
163 if self.fileinfo['next_segment_offset'] > self.file_size:
164 self.fileinfo['next_segment_offset'] = self.file_size
165 # raise(ValueError, "Next Segment Offset too large in TDMS header")
167 def __enter__(self):
168 return self
170 def __exit__(self, exc_type, exc_value, traceback):
171 self._tdms_file.close()
173 @property
174 def channel_length(self):
175 if self._properties is None:
176 self.get_properties()
178 rdo = num.int64(self.fileinfo['raw_data_offset'])
179 nch = num.int64(self.n_channels)
180 nso = self.fileinfo['next_segment_offset']
181 return num.int64(
182 (nso - rdo) / nch / num.dtype(self._data_type).itemsize)
184 @property
185 def n_channels(self):
186 if self._properties is None:
187 self.get_properties()
188 return self.fileinfo['n_channels']
190 def get_properties(self, mapped=False):
191 '''
192 Return a dictionary of properties. Read from file only if necessary.
193 '''
194 # Check if already hold properties in memory
195 if self._properties is None:
196 self._properties = self._read_properties()
197 return self._properties
199 def _read_property(self):
200 '''
201 Read a single property from the TDMS file.
202 Return the name, type and value of the property as a list.
203 '''
204 # Read length of object path:
205 var = struct.unpack('<i', self._tdms_file.read(4))[0]
206 # Read property name and type:
207 name, data_type = struct.unpack(
208 '<{0}si'.format(var), self._tdms_file.read(var + 4)
209 )
210 # Lookup function to read and parse property value based on type:
211 value = TDS_READ_VAL[TDS_DATA_TYPE[data_type]](self._tdms_file)
212 name = name.decode()
213 if data_type == 32:
214 value = value.decode()
216 return name, data_type, value
218 def _read_properties(self):
219 '''Read the properties from the file'''
220 self._tdms_file.seek(LEAD_IN_LENGTH, 0)
221 # Number of channels is total objects - file objects - group objects
222 self.fileinfo['n_channels'] = (
223 struct.unpack('i', self._tdms_file.read(4))[0] - 2
224 )
225 # Read length of object path:
226 var = struct.unpack('<i', self._tdms_file.read(4))[0]
227 # skip over object path and raw data index:
228 self._tdms_file.seek(var + 4, 1)
229 # Read number of properties in this group:
230 var = struct.unpack('<i', self._tdms_file.read(4))[0]
232 # loop through and read each property
233 properties = [self._read_property() for _ in range(var)]
234 properties = {prop: value for (prop, type, value) in properties}
236 self._end_of_properties_offset = self._tdms_file.tell()
238 self._read_chunk_size()
239 # TODO: Add number of channels to properties
240 return properties
242 def _read_chunk_size(self):
243 '''Read the data chunk size from the TDMS file header.'''
244 if self._end_of_properties_offset is None:
245 self._read_properties()
247 self._tdms_file.seek(self._end_of_properties_offset, 0)
249 # skip over Group Information:
250 var = struct.unpack('<i', self._tdms_file.read(4))[0]
251 self._tdms_file.seek(var + 8, 1)
253 # skip over first channel path and length of index information:
254 var = struct.unpack('<i', self._tdms_file.read(4))[0]
255 self._tdms_file.seek(var + 4, 1)
257 self._data_type = TDS_DATA_TYPE.get(
258 struct.unpack('<i', self._tdms_file.read(4))[0]
259 )
260 if self._data_type not in ('int16', 'float32'):
261 raise Exception('Unsupported TDMS data type: ' + self._data_type)
263 # Read Dimension of the raw data array (has to be 1):
264 # dummy = struct.unpack("<i", self._tdms_file.read(4))[0]
266 self._chunk_size = struct.unpack('<i', self._tdms_file.read(4))[0]
268 def get_data(self, first_ch=0, last_ch=None, first_s=0, last_s=None):
269 '''
270 Get a block of data from the TDMS file.
271 first_ch -- The first channel to load
272 last_ch -- The last channel to load
273 first_s -- The first sample to load
274 last_s -- The last sample to load
275 '''
276 if self._raw_data is None:
277 self._initialise_data()
278 if first_ch is None or first_ch < 0:
279 first_ch = 0
280 if last_ch is None or last_ch >= self.n_channels:
281 last_ch = self.n_channels
282 else:
283 last_ch += 1
284 if last_s is None or last_s > self._channel_length:
285 last_s = self._channel_length
286 else:
287 last_s += 1
288 nch = num.int64(max(last_ch - first_ch, 0))
289 ns = num.int64(max(last_s - first_s, 0))
291 # Allocate output container
292 data = num.empty((ns, nch), dtype=num.dtype(self._data_type))
293 if data.size == 0:
294 return data
296 # 1. Index first block & reshape?
297 first_blk = first_s // self._chunk_size
298 last_blk = last_s // self._chunk_size
299 last_full_blk = min(last_blk + 1, self._raw_data.shape[1])
300 nchunk = min(
301 max(last_full_blk - first_blk, 0), self._raw_data.shape[1]
302 )
303 first_s_1a = max(first_s - first_blk * self._chunk_size, 0)
304 last_s_1a = min(
305 last_s - first_blk * self._chunk_size, nchunk * self._chunk_size
306 )
307 ind_s = 0
308 ind_e = ind_s + max(last_s_1a - first_s_1a, 0)
310 d = self._raw_data[:, first_blk:last_full_blk, first_ch:last_ch]
311 d.shape = (self._chunk_size * nchunk, nch)
312 d.reshape((self._chunk_size * nchunk, nch), order='F')
313 data[ind_s:ind_e, :] = d[first_s_1a:last_s_1a, :]
315 # 2. Index first additional samples
316 first_s_1b = max(
317 first_s - self._raw_data.shape[1] * self._chunk_size, 0
318 )
319 last_s_1b = min(
320 last_s - self._raw_data.shape[1] * self._chunk_size,
321 self._raw_last_chunk.shape[0],
322 )
323 ind_s = ind_e
324 ind_e = ind_s + max(last_s_1b - first_s_1b, 0)
325 # data_1b = self._raw_last_chunk[first_s_1b:last_s_1b,first_ch:last_ch]
326 if ind_e > ind_s:
327 data[ind_s:ind_e, :] = self._raw_last_chunk[
328 first_s_1b:last_s_1b, first_ch:last_ch
329 ]
331 # 3. Index second block
332 first_s_2 = max(first_s - self._seg1_length, 0)
333 last_s_2 = last_s - self._seg1_length
334 if (first_s_2 > 0 or last_s_2 > 0) and self._raw_data2 is not None:
335 first_blk_2 = max(first_s_2 // self._chunk_size, 0)
336 last_blk_2 = max(last_s_2 // self._chunk_size, 0)
337 last_full_blk_2 = min(last_blk_2 + 1, self._raw_data2.shape[1])
338 nchunk_2 = min(
339 max(last_full_blk_2 - first_blk_2, 0), self._raw_data2.shape[1]
340 )
341 first_s_2a = max(first_s_2 - first_blk_2 * self._chunk_size, 0)
342 last_s_2a = min(
343 last_s_2 - first_blk_2 * self._chunk_size,
344 nchunk_2 * self._chunk_size,
345 )
346 ind_s = ind_e
347 ind_e = ind_s + max(last_s_2a - first_s_2a, 0)
348 # data_2a = self._raw_data2[:, first_blk_2:last_full_blk_2,
349 # first_ch:last_ch]\
350 # .reshape((self._chunk_size*nchunk_2, nch), order='F')\
351 # [first_s_2a:last_s_2a, :]
352 if ind_e > ind_s:
353 data[ind_s:ind_e, :] = self._raw_data2[
354 :, first_blk_2:last_full_blk_2, first_ch:last_ch
355 ].reshape((self._chunk_size * nchunk_2, nch), order='F')[
356 first_s_2a:last_s_2a, :
357 ]
358 # 4. Index second additional samples
359 if (
360 first_s_2 > 0 or last_s_2 > 0
361 ) and self._raw2_last_chunk is not None:
362 first_s_2b = max(
363 first_s_2 - self._raw_data2.shape[1] * self._chunk_size, 0
364 )
365 last_s_2b = min(
366 last_s_2 - self._raw_data2.shape[1] * self._chunk_size,
367 self._raw2_last_chunk.shape[0],
368 )
369 ind_s = ind_e
370 ind_e = ind_s + max(last_s_2b - first_s_2b, 0)
371 # data_2b = \
372 # self._raw2_last_chunk[first_s_2b:last_s_2b,first_ch:last_ch]
373 if ind_e > ind_s:
374 data[ind_s:ind_e, :] = self._raw2_last_chunk[
375 first_s_2b:last_s_2b, first_ch:last_ch
376 ]
377 # 5. Concatenate blocks
378 # data = num.concatenate((data_1a, data_1b, data_2a, data_2b))
379 if data.size == 0:
380 data = data.reshape(0, 0)
381 return data
383 def _initialise_data(self):
384 '''Initialise the memory map for the data array.'''
385 if self._chunk_size is None:
386 self._read_chunk_size()
388 dmap = mmap.mmap(self._tdms_file.fileno(), 0, access=mmap.ACCESS_READ)
389 rdo = num.int64(self.fileinfo['raw_data_offset'])
390 nch = num.int64(self.n_channels)
392 # TODO: Support streaming file type?
393 # TODO: Is this a valid calculation for ChannelLength?
394 nso = self.fileinfo['next_segment_offset']
395 self._seg1_length = num.int64(
396 (nso - rdo) / nch / num.dtype(self._data_type).itemsize
397 )
398 self._channel_length = self._seg1_length
400 if self.fileinfo['decimated']:
401 n_complete_blk = num.int64(self._seg1_length / self._chunk_size)
402 ax_ord = 'C'
403 else:
404 n_complete_blk = 0
405 ax_ord = 'F'
406 self._raw_data = num.ndarray(
407 (n_complete_blk, nch, self._chunk_size),
408 dtype=self._data_type,
409 buffer=dmap,
410 offset=rdo,
411 )
412 # Rotate the axes to [chunk_size, nblk, nch]
413 self._raw_data = num.rollaxis(self._raw_data, 2)
414 additional_samples = num.int64(
415 self._seg1_length - n_complete_blk * self._chunk_size
416 )
417 additional_samples_offset = (
418 rdo
419 + n_complete_blk
420 * nch
421 * self._chunk_size
422 * num.dtype(self._data_type).itemsize
423 )
424 self._raw_last_chunk = num.ndarray(
425 (nch, additional_samples),
426 dtype=self._data_type,
427 buffer=dmap,
428 offset=additional_samples_offset,
429 order=ax_ord,
430 )
431 # Rotate the axes to [samples, nch]
432 self._raw_last_chunk = num.rollaxis(self._raw_last_chunk, 1)
434 if self.file_size == nso:
435 self._seg2_length = 0
436 else:
437 self._tdms_file.seek(nso + 12, 0)
438 (seg2_nso, seg2_rdo) = struct.unpack(
439 '<qq', self._tdms_file.read(2 * 8)
440 )
441 self._seg2_length = (
442 (seg2_nso - seg2_rdo)
443 / nch
444 / num.dtype(self._data_type).itemsize
445 )
446 if self.fileinfo['decimated']:
447 n_complete_blk2 = num.int64(
448 self._seg2_length / self._chunk_size)
449 else:
450 n_complete_blk2 = num.int64(0)
451 self._raw_data2 = num.ndarray(
452 (n_complete_blk2, nch, self._chunk_size),
453 dtype=self._data_type,
454 buffer=dmap,
455 offset=(nso + LEAD_IN_LENGTH + seg2_rdo),
456 )
457 self._raw_data2 = num.rollaxis(self._raw_data2, 2)
458 additional_samples = num.int64(
459 self._seg2_length - n_complete_blk2 * self._chunk_size
460 )
461 additional_samples_offset = (
462 nso
463 + LEAD_IN_LENGTH
464 + seg2_rdo
465 + n_complete_blk2
466 * nch
467 * self._chunk_size
468 * num.dtype(self._data_type).itemsize
469 )
470 self._raw2_last_chunk = num.ndarray(
471 (nch, additional_samples),
472 dtype=self._data_type,
473 buffer=dmap,
474 offset=additional_samples_offset,
475 order=ax_ord,
476 )
477 # Rotate the axes to [samples, nch]
478 self._raw2_last_chunk = num.rollaxis(self._raw2_last_chunk, 1)
480 if self._raw_data2.size != 0 or self._raw2_last_chunk.size != 0:
481 pass
482 # raise Exception('Second segment contains some data, \
483 # not currently supported')
484 self._channel_length = self._seg1_length + self._seg2_length
485 # else:
486 # print "Not decimated"
487 # raise Exception('Reading file with decimated flag not set is not'
488 # ' supported yet')
491META_KEYS = {
492 'measure_length': 'MeasureLength[m]',
493 'start_position': 'StartPosition[m]',
494 'spatial_resolution': 'SpatialResolution[m]',
495 'fibre_index': 'FibreIndex',
496 'unit_calibration': 'Unit Calibration (nm)',
497 'start_distance': 'Start Distance (m)',
498 'stop_distance': 'Stop Distance (m)',
499 'normalization': 'Normalization',
500 'decimation_filter': 'Decimation Filter',
501 'gauge_length': 'GaugeLength',
502 'norm_offset': 'Norm Offset',
503 'source_mode': 'Source Mode',
504 'time_decimation': 'Time Decimation',
505 'zero_offset': 'Zero Offset (m)',
506 'p_parameter': 'P',
507 'p_coefficients': 'P Coefficients',
508 'idas_version': 'iDASVersion',
509 'precice_sampling_freq': 'Precise Sampling Frequency (Hz)',
510 'receiver_gain': 'Receiver Gain',
511 'continuous_mode': 'Continuous Mode',
512 'geo_lat': 'SystemInfomation.GPS.Latitude',
513 'geo_lon': 'SystemInfomation.GPS.Longitude',
514 'geo_elevation': 'SystemInfomation.GPS.Altitude',
516 'channel': None,
517 'unit': None
518}
521def get_meta(tdms_properties):
522 prop = tdms_properties
524 deltat = 1. / prop['SamplingFrequency[Hz]']
525 tmin = prop['GPSTimeStamp'].timestamp()
527 fibre_meta = {key: prop.get(key_map, -1)
528 for key, key_map in META_KEYS.items()
529 if key_map is not None}
531 coeff = fibre_meta['p_coefficients']
532 try:
533 coeff = tuple(map(float, coeff.split('\t')))
534 except ValueError:
535 coeff = tuple(map(float, coeff.split(';')))
537 gain = fibre_meta['receiver_gain']
538 try:
539 gain = tuple(map(float, gain.split('\t')))
540 except ValueError:
541 gain = tuple(map(float, gain.split(';')))
542 fibre_meta['receiver_gain'] = coeff
544 fibre_meta['unit'] = 'radians'
546 return deltat, tmin, fibre_meta
549def iload(filename, load_data=True):
550 tdms = TdmsReader(filename)
551 deltat, tmin, meta = get_meta(tdms.get_properties())
553 data = tdms.get_data().T.copy() if load_data else None
555 for icha in range(tdms.n_channels):
556 meta_cha = meta.copy()
558 assert icha < 99999
559 station = '%05i' % icha
560 meta_cha['channel'] = icha
562 nsamples = tdms.channel_length
564 tr = trace.Trace(
565 network='DA',
566 station=station,
567 ydata=None,
568 deltat=deltat,
569 tmin=tmin,
570 tmax=tmin + (nsamples - 1) * deltat,
571 meta=meta_cha)
573 if data is not None:
574 tr.set_ydata(data[icha])
576 yield tr
579def detect(first512):
580 return first512.startswith(b'TDSm.')