Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/io/tdms_idas.py: 76%

231 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-10-11 22:43 +0000

1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7Reader for `Silixia iDAS 

8<https://silixa.com/technology/idas-intelligent-distributed-acoustic-sensor/>`_ 

9TDMS files. 

10''' 

11 

12import logging 

13import os.path as op 

14import struct 

15import datetime 

16import mmap 

17import numpy as num 

18 

19from pyrocko import trace 

20 

21logger = logging.getLogger(__name__) 

22 

23 

24def write_property_dict(prop_dict, out_file): 

25 from pprint import pformat 

26 

27 f = open(out_file, 'w') 

28 f.write('tdms_property_map=') 

29 f.write(pformat(prop_dict)) 

30 f.close() 

31 

32 

33def type_not_supported(vargin): 

34 '''Function raises a NotImplementedException.''' 

35 raise NotImplementedError('Reading of this tdsDataType is not implemented') 

36 

37 

38def parse_time_stamp(fractions, seconds): 

39 ''' 

40 Convert time TDMS time representation to datetime 

41 fractions -- fractional seconds (2^-64) 

42 seconds -- The number of seconds since 1/1/1904 

43 @rtype : datetime.datetime 

44 ''' 

45 if ( 

46 fractions is not None 

47 and seconds is not None 

48 and fractions + seconds > 0 

49 ): 

50 return datetime.timedelta( 

51 0, fractions * 2 ** -64 + seconds 

52 ) + datetime.datetime(1904, 1, 1) 

53 else: 

54 return None 

55 

56 

57# Enum mapping TDM data types to description string, numpy type where exists 

58# See Ref[2] for enum values 

59TDS_DATA_TYPE = dict( 

60 { 

61 0x00: 'void', # tdsTypeVoid 

62 0x01: 'int8', # tdsTypeI8 

63 0x02: 'int16', # tdsTypeI16 

64 0x03: 'int32', # tdsTypeI32 

65 0x04: 'int64', # tdsTypeI64 

66 0x05: 'uint8', # tdsTypeU8 

67 0x06: 'uint16', # tdsTypeU16 

68 0x07: 'uint32', # tdsTypeU32 

69 0x08: 'uint64', # tdsTypeU64 

70 0x09: 'float32', # tdsTypeSingleFloat 

71 0x0A: 'float64', # tdsTypeDoubleFloat 

72 0x0B: 'float128', # tdsTypeExtendedFloat 

73 0x19: 'singleFloatWithUnit', # tdsTypeSingleFloatWithUnit 

74 0x1A: 'doubleFloatWithUnit', # tdsTypeDoubleFloatWithUnit 

75 0x1B: 'extendedFloatWithUnit', # tdsTypeExtendedFloatWithUnit 

76 0x20: 'str', # tdsTypeString 

77 0x21: 'bool', # tdsTypeBoolean 

78 0x44: 'datetime', # tdsTypeTimeStamp 

79 0xFFFFFFFF: 'raw', # tdsTypeDAQmxRawData 

80 } 

81) 

82 

83# Function mapping for reading TDMS data types 

84TDS_READ_VAL = dict( 

85 { 

86 'void': lambda f: None, # tdsTypeVoid 

87 'int8': lambda f: struct.unpack('<b', f.read(1))[0], 

88 'int16': lambda f: struct.unpack('<h', f.read(2))[0], 

89 'int32': lambda f: struct.unpack('<i', f.read(4))[0], 

90 'int64': lambda f: struct.unpack('<q', f.read(8))[0], 

91 'uint8': lambda f: struct.unpack('<B', f.read(1))[0], 

92 'uint16': lambda f: struct.unpack('<H', f.read(2))[0], 

93 'uint32': lambda f: struct.unpack('<I', f.read(4))[0], 

94 'uint64': lambda f: struct.unpack('<Q', f.read(8))[0], 

95 'float32': lambda f: struct.unpack('<f', f.read(4))[0], 

96 'float64': lambda f: struct.unpack('<d', f.read(8))[0], 

97 'float128': type_not_supported, 

98 'singleFloatWithUnit': type_not_supported, 

99 'doubleFloatWithUnit': type_not_supported, 

100 'extendedFloatWithUnit': type_not_supported, 

101 'str': lambda f: f.read(struct.unpack('<i', f.read(4))[0]), 

102 'bool': lambda f: struct.unpack('<?', f.read(1))[0], 

103 'datetime': lambda f: parse_time_stamp( 

104 struct.unpack('<Q', f.read(8))[0], 

105 struct.unpack('<q', f.read(8))[0], 

106 ), 

107 'raw': type_not_supported, 

108 } 

109) 

110 

111DECIMATE_MASK = 0b00100000 

112LEAD_IN_LENGTH = 28 

113FILEINFO_NAMES = ( 

114 'file_tag', 

115 'toc', 

116 'version', 

117 'next_segment_offset', 

118 'raw_data_offset', 

119) 

120 

121 

122class TdmsReader(object): 

123 '''A TDMS file reader object for reading properties and data''' 

124 

125 def __init__(self, filename): 

126 self._properties = None 

127 self._end_of_properties_offset = None 

128 self._data_type = None 

129 self._chunk_size = None 

130 

131 self._raw_data = None 

132 self._raw_data2 = None # The mapped data in the 'Next Segment' 

133 self._raw_last_chunk = None 

134 self._raw2_last_chunk = None 

135 

136 self.file_size = op.getsize(filename) 

137 self._channel_length = None 

138 self._seg1_length = None 

139 self._seg2_length = None 

140 

141 # TODO: Error if file not big enough to hold header 

142 self._tdms_file = open(filename, 'rb') 

143 # Read lead in (28 bytes): 

144 lead_in = self._tdms_file.read(LEAD_IN_LENGTH) 

145 # lead_in is 28 bytes: 

146 # [string of length 4][int32][int32][int64][int64] 

147 fields = struct.unpack('<4siiQQ', lead_in) 

148 

149 if fields[0].decode() not in 'TDSm': 

150 msg = 'Not a TDMS file (TDSm tag not found)' 

151 raise (TypeError, msg) 

152 

153 self.fileinfo = dict(zip(FILEINFO_NAMES, fields)) 

154 self.fileinfo['decimated'] = not bool( 

155 self.fileinfo['toc'] & DECIMATE_MASK 

156 ) 

157 # Make offsets relative to beginning of file: 

158 self.fileinfo['next_segment_offset'] += LEAD_IN_LENGTH 

159 self.fileinfo['raw_data_offset'] += LEAD_IN_LENGTH 

160 self.fileinfo['file_size'] = op.getsize(self._tdms_file.name) 

161 

162 # TODO: Validate lead in: 

163 if self.fileinfo['next_segment_offset'] > self.file_size: 

164 self.fileinfo['next_segment_offset'] = self.file_size 

165 # raise(ValueError, "Next Segment Offset too large in TDMS header") 

166 

167 def __enter__(self): 

168 return self 

169 

170 def __exit__(self, exc_type, exc_value, traceback): 

171 self._tdms_file.close() 

172 

173 @property 

174 def channel_length(self): 

175 if self._properties is None: 

176 self.get_properties() 

177 

178 rdo = num.int64(self.fileinfo['raw_data_offset']) 

179 nch = num.int64(self.n_channels) 

180 nso = self.fileinfo['next_segment_offset'] 

181 return num.int64( 

182 (nso - rdo) / nch / num.dtype(self._data_type).itemsize) 

183 

184 @property 

185 def n_channels(self): 

186 if self._properties is None: 

187 self.get_properties() 

188 return self.fileinfo['n_channels'] 

189 

190 def get_properties(self, mapped=False): 

191 ''' 

192 Return a dictionary of properties. Read from file only if necessary. 

193 ''' 

194 # Check if already hold properties in memory 

195 if self._properties is None: 

196 self._properties = self._read_properties() 

197 return self._properties 

198 

199 def _read_property(self): 

200 ''' 

201 Read a single property from the TDMS file. 

202 Return the name, type and value of the property as a list. 

203 ''' 

204 # Read length of object path: 

205 var = struct.unpack('<i', self._tdms_file.read(4))[0] 

206 # Read property name and type: 

207 name, data_type = struct.unpack( 

208 '<{0}si'.format(var), self._tdms_file.read(var + 4) 

209 ) 

210 # Lookup function to read and parse property value based on type: 

211 value = TDS_READ_VAL[TDS_DATA_TYPE[data_type]](self._tdms_file) 

212 name = name.decode() 

213 if data_type == 32: 

214 value = value.decode() 

215 

216 return name, data_type, value 

217 

218 def _read_properties(self): 

219 '''Read the properties from the file''' 

220 self._tdms_file.seek(LEAD_IN_LENGTH, 0) 

221 # Number of channels is total objects - file objects - group objects 

222 self.fileinfo['n_channels'] = ( 

223 struct.unpack('i', self._tdms_file.read(4))[0] - 2 

224 ) 

225 # Read length of object path: 

226 var = struct.unpack('<i', self._tdms_file.read(4))[0] 

227 # skip over object path and raw data index: 

228 self._tdms_file.seek(var + 4, 1) 

229 # Read number of properties in this group: 

230 var = struct.unpack('<i', self._tdms_file.read(4))[0] 

231 

232 # loop through and read each property 

233 properties = [self._read_property() for _ in range(var)] 

234 properties = {prop: value for (prop, type, value) in properties} 

235 

236 self._end_of_properties_offset = self._tdms_file.tell() 

237 

238 self._read_chunk_size() 

239 # TODO: Add number of channels to properties 

240 return properties 

241 

242 def _read_chunk_size(self): 

243 '''Read the data chunk size from the TDMS file header.''' 

244 if self._end_of_properties_offset is None: 

245 self._read_properties() 

246 

247 self._tdms_file.seek(self._end_of_properties_offset, 0) 

248 

249 # skip over Group Information: 

250 var = struct.unpack('<i', self._tdms_file.read(4))[0] 

251 self._tdms_file.seek(var + 8, 1) 

252 

253 # skip over first channel path and length of index information: 

254 var = struct.unpack('<i', self._tdms_file.read(4))[0] 

255 self._tdms_file.seek(var + 4, 1) 

256 

257 self._data_type = TDS_DATA_TYPE.get( 

258 struct.unpack('<i', self._tdms_file.read(4))[0] 

259 ) 

260 if self._data_type not in ('int16', 'float32'): 

261 raise Exception('Unsupported TDMS data type: ' + self._data_type) 

262 

263 # Read Dimension of the raw data array (has to be 1): 

264 # dummy = struct.unpack("<i", self._tdms_file.read(4))[0] 

265 

266 self._chunk_size = struct.unpack('<i', self._tdms_file.read(4))[0] 

267 

268 def get_data(self, first_ch=0, last_ch=None, first_s=0, last_s=None): 

269 ''' 

270 Get a block of data from the TDMS file. 

271 first_ch -- The first channel to load 

272 last_ch -- The last channel to load 

273 first_s -- The first sample to load 

274 last_s -- The last sample to load 

275 ''' 

276 if self._raw_data is None: 

277 self._initialise_data() 

278 if first_ch is None or first_ch < 0: 

279 first_ch = 0 

280 if last_ch is None or last_ch >= self.n_channels: 

281 last_ch = self.n_channels 

282 else: 

283 last_ch += 1 

284 if last_s is None or last_s > self._channel_length: 

285 last_s = self._channel_length 

286 else: 

287 last_s += 1 

288 nch = num.int64(max(last_ch - first_ch, 0)) 

289 ns = num.int64(max(last_s - first_s, 0)) 

290 

291 # Allocate output container 

292 data = num.empty((ns, nch), dtype=num.dtype(self._data_type)) 

293 if data.size == 0: 

294 return data 

295 

296 # 1. Index first block & reshape? 

297 first_blk = first_s // self._chunk_size 

298 last_blk = last_s // self._chunk_size 

299 last_full_blk = min(last_blk + 1, self._raw_data.shape[1]) 

300 nchunk = min( 

301 max(last_full_blk - first_blk, 0), self._raw_data.shape[1] 

302 ) 

303 first_s_1a = max(first_s - first_blk * self._chunk_size, 0) 

304 last_s_1a = min( 

305 last_s - first_blk * self._chunk_size, nchunk * self._chunk_size 

306 ) 

307 ind_s = 0 

308 ind_e = ind_s + max(last_s_1a - first_s_1a, 0) 

309 

310 d = self._raw_data[:, first_blk:last_full_blk, first_ch:last_ch] 

311 d.shape = (self._chunk_size * nchunk, nch) 

312 d.reshape((self._chunk_size * nchunk, nch), order='F') 

313 data[ind_s:ind_e, :] = d[first_s_1a:last_s_1a, :] 

314 

315 # 2. Index first additional samples 

316 first_s_1b = max( 

317 first_s - self._raw_data.shape[1] * self._chunk_size, 0 

318 ) 

319 last_s_1b = min( 

320 last_s - self._raw_data.shape[1] * self._chunk_size, 

321 self._raw_last_chunk.shape[0], 

322 ) 

323 ind_s = ind_e 

324 ind_e = ind_s + max(last_s_1b - first_s_1b, 0) 

325 # data_1b = self._raw_last_chunk[first_s_1b:last_s_1b,first_ch:last_ch] 

326 if ind_e > ind_s: 

327 data[ind_s:ind_e, :] = self._raw_last_chunk[ 

328 first_s_1b:last_s_1b, first_ch:last_ch 

329 ] 

330 

331 # 3. Index second block 

332 first_s_2 = max(first_s - self._seg1_length, 0) 

333 last_s_2 = last_s - self._seg1_length 

334 if (first_s_2 > 0 or last_s_2 > 0) and self._raw_data2 is not None: 

335 first_blk_2 = max(first_s_2 // self._chunk_size, 0) 

336 last_blk_2 = max(last_s_2 // self._chunk_size, 0) 

337 last_full_blk_2 = min(last_blk_2 + 1, self._raw_data2.shape[1]) 

338 nchunk_2 = min( 

339 max(last_full_blk_2 - first_blk_2, 0), self._raw_data2.shape[1] 

340 ) 

341 first_s_2a = max(first_s_2 - first_blk_2 * self._chunk_size, 0) 

342 last_s_2a = min( 

343 last_s_2 - first_blk_2 * self._chunk_size, 

344 nchunk_2 * self._chunk_size, 

345 ) 

346 ind_s = ind_e 

347 ind_e = ind_s + max(last_s_2a - first_s_2a, 0) 

348 # data_2a = self._raw_data2[:, first_blk_2:last_full_blk_2, 

349 # first_ch:last_ch]\ 

350 # .reshape((self._chunk_size*nchunk_2, nch), order='F')\ 

351 # [first_s_2a:last_s_2a, :] 

352 if ind_e > ind_s: 

353 data[ind_s:ind_e, :] = self._raw_data2[ 

354 :, first_blk_2:last_full_blk_2, first_ch:last_ch 

355 ].reshape((self._chunk_size * nchunk_2, nch), order='F')[ 

356 first_s_2a:last_s_2a, : 

357 ] 

358 # 4. Index second additional samples 

359 if ( 

360 first_s_2 > 0 or last_s_2 > 0 

361 ) and self._raw2_last_chunk is not None: 

362 first_s_2b = max( 

363 first_s_2 - self._raw_data2.shape[1] * self._chunk_size, 0 

364 ) 

365 last_s_2b = min( 

366 last_s_2 - self._raw_data2.shape[1] * self._chunk_size, 

367 self._raw2_last_chunk.shape[0], 

368 ) 

369 ind_s = ind_e 

370 ind_e = ind_s + max(last_s_2b - first_s_2b, 0) 

371 # data_2b = \ 

372 # self._raw2_last_chunk[first_s_2b:last_s_2b,first_ch:last_ch] 

373 if ind_e > ind_s: 

374 data[ind_s:ind_e, :] = self._raw2_last_chunk[ 

375 first_s_2b:last_s_2b, first_ch:last_ch 

376 ] 

377 # 5. Concatenate blocks 

378 # data = num.concatenate((data_1a, data_1b, data_2a, data_2b)) 

379 if data.size == 0: 

380 data = data.reshape(0, 0) 

381 return data 

382 

383 def _initialise_data(self): 

384 '''Initialise the memory map for the data array.''' 

385 if self._chunk_size is None: 

386 self._read_chunk_size() 

387 

388 dmap = mmap.mmap(self._tdms_file.fileno(), 0, access=mmap.ACCESS_READ) 

389 rdo = num.int64(self.fileinfo['raw_data_offset']) 

390 nch = num.int64(self.n_channels) 

391 

392 # TODO: Support streaming file type? 

393 # TODO: Is this a valid calculation for ChannelLength? 

394 nso = self.fileinfo['next_segment_offset'] 

395 self._seg1_length = num.int64( 

396 (nso - rdo) / nch / num.dtype(self._data_type).itemsize 

397 ) 

398 self._channel_length = self._seg1_length 

399 

400 if self.fileinfo['decimated']: 

401 n_complete_blk = num.int64(self._seg1_length / self._chunk_size) 

402 ax_ord = 'C' 

403 else: 

404 n_complete_blk = 0 

405 ax_ord = 'F' 

406 self._raw_data = num.ndarray( 

407 (n_complete_blk, nch, self._chunk_size), 

408 dtype=self._data_type, 

409 buffer=dmap, 

410 offset=rdo, 

411 ) 

412 # Rotate the axes to [chunk_size, nblk, nch] 

413 self._raw_data = num.rollaxis(self._raw_data, 2) 

414 additional_samples = num.int64( 

415 self._seg1_length - n_complete_blk * self._chunk_size 

416 ) 

417 additional_samples_offset = ( 

418 rdo 

419 + n_complete_blk 

420 * nch 

421 * self._chunk_size 

422 * num.dtype(self._data_type).itemsize 

423 ) 

424 self._raw_last_chunk = num.ndarray( 

425 (nch, additional_samples), 

426 dtype=self._data_type, 

427 buffer=dmap, 

428 offset=additional_samples_offset, 

429 order=ax_ord, 

430 ) 

431 # Rotate the axes to [samples, nch] 

432 self._raw_last_chunk = num.rollaxis(self._raw_last_chunk, 1) 

433 

434 if self.file_size == nso: 

435 self._seg2_length = 0 

436 else: 

437 self._tdms_file.seek(nso + 12, 0) 

438 (seg2_nso, seg2_rdo) = struct.unpack( 

439 '<qq', self._tdms_file.read(2 * 8) 

440 ) 

441 self._seg2_length = ( 

442 (seg2_nso - seg2_rdo) 

443 / nch 

444 / num.dtype(self._data_type).itemsize 

445 ) 

446 if self.fileinfo['decimated']: 

447 n_complete_blk2 = num.int64( 

448 self._seg2_length / self._chunk_size) 

449 else: 

450 n_complete_blk2 = num.int64(0) 

451 self._raw_data2 = num.ndarray( 

452 (n_complete_blk2, nch, self._chunk_size), 

453 dtype=self._data_type, 

454 buffer=dmap, 

455 offset=(nso + LEAD_IN_LENGTH + seg2_rdo), 

456 ) 

457 self._raw_data2 = num.rollaxis(self._raw_data2, 2) 

458 additional_samples = num.int64( 

459 self._seg2_length - n_complete_blk2 * self._chunk_size 

460 ) 

461 additional_samples_offset = ( 

462 nso 

463 + LEAD_IN_LENGTH 

464 + seg2_rdo 

465 + n_complete_blk2 

466 * nch 

467 * self._chunk_size 

468 * num.dtype(self._data_type).itemsize 

469 ) 

470 self._raw2_last_chunk = num.ndarray( 

471 (nch, additional_samples), 

472 dtype=self._data_type, 

473 buffer=dmap, 

474 offset=additional_samples_offset, 

475 order=ax_ord, 

476 ) 

477 # Rotate the axes to [samples, nch] 

478 self._raw2_last_chunk = num.rollaxis(self._raw2_last_chunk, 1) 

479 

480 if self._raw_data2.size != 0 or self._raw2_last_chunk.size != 0: 

481 pass 

482 # raise Exception('Second segment contains some data, \ 

483 # not currently supported') 

484 self._channel_length = self._seg1_length + self._seg2_length 

485 # else: 

486 # print "Not decimated" 

487 # raise Exception('Reading file with decimated flag not set is not' 

488 # ' supported yet') 

489 

490 

491META_KEYS = { 

492 'measure_length': 'MeasureLength[m]', 

493 'start_position': 'StartPosition[m]', 

494 'spatial_resolution': 'SpatialResolution[m]', 

495 'fibre_index': 'FibreIndex', 

496 'unit_calibration': 'Unit Calibration (nm)', 

497 'start_distance': 'Start Distance (m)', 

498 'stop_distance': 'Stop Distance (m)', 

499 'normalization': 'Normalization', 

500 'decimation_filter': 'Decimation Filter', 

501 'gauge_length': 'GaugeLength', 

502 'norm_offset': 'Norm Offset', 

503 'source_mode': 'Source Mode', 

504 'time_decimation': 'Time Decimation', 

505 'zero_offset': 'Zero Offset (m)', 

506 'p_parameter': 'P', 

507 'p_coefficients': 'P Coefficients', 

508 'idas_version': 'iDASVersion', 

509 'precice_sampling_freq': 'Precise Sampling Frequency (Hz)', 

510 'receiver_gain': 'Receiver Gain', 

511 'continuous_mode': 'Continuous Mode', 

512 'geo_lat': 'SystemInfomation.GPS.Latitude', 

513 'geo_lon': 'SystemInfomation.GPS.Longitude', 

514 'geo_elevation': 'SystemInfomation.GPS.Altitude', 

515 

516 'channel': None, 

517 'unit': None 

518} 

519 

520 

521def get_meta(tdms_properties): 

522 prop = tdms_properties 

523 

524 deltat = 1. / prop['SamplingFrequency[Hz]'] 

525 tmin = prop['GPSTimeStamp'].timestamp() 

526 

527 fibre_meta = {key: prop.get(key_map, -1) 

528 for key, key_map in META_KEYS.items() 

529 if key_map is not None} 

530 

531 coeff = fibre_meta['p_coefficients'] 

532 try: 

533 coeff = tuple(map(float, coeff.split('\t'))) 

534 except ValueError: 

535 coeff = tuple(map(float, coeff.split(';'))) 

536 

537 gain = fibre_meta['receiver_gain'] 

538 try: 

539 gain = tuple(map(float, gain.split('\t'))) 

540 except ValueError: 

541 gain = tuple(map(float, gain.split(';'))) 

542 fibre_meta['receiver_gain'] = coeff 

543 

544 fibre_meta['unit'] = 'radians' 

545 

546 return deltat, tmin, fibre_meta 

547 

548 

549def iload(filename, load_data=True): 

550 tdms = TdmsReader(filename) 

551 deltat, tmin, meta = get_meta(tdms.get_properties()) 

552 

553 data = tdms.get_data().T.copy() if load_data else None 

554 

555 for icha in range(tdms.n_channels): 

556 meta_cha = meta.copy() 

557 

558 assert icha < 99999 

559 station = '%05i' % icha 

560 meta_cha['channel'] = icha 

561 

562 nsamples = tdms.channel_length 

563 

564 tr = trace.Trace( 

565 network='DA', 

566 station=station, 

567 ydata=None, 

568 deltat=deltat, 

569 tmin=tmin, 

570 tmax=tmin + (nsamples - 1) * deltat, 

571 meta=meta_cha) 

572 

573 if data is not None: 

574 tr.set_ydata(data[icha]) 

575 

576 yield tr 

577 

578 

579def detect(first512): 

580 return first512.startswith(b'TDSm.')