1import logging 

2import os.path as op 

3import struct 

4import datetime 

5import mmap 

6import numpy as num 

7 

8from pyrocko import trace 

9 

10logger = logging.getLogger(__name__) 

11 

12 

13def write_property_dict(prop_dict, out_file): 

14 from pprint import pformat 

15 

16 f = open(out_file, 'w') 

17 f.write('tdms_property_map=') 

18 f.write(pformat(prop_dict)) 

19 f.close() 

20 

21 

22def type_not_supported(vargin): 

23 '''Function raises a NotImplementedException.''' 

24 raise NotImplementedError('Reading of this tdsDataType is not implemented') 

25 

26 

27def parse_time_stamp(fractions, seconds): 

28 ''' 

29 Convert time TDMS time representation to datetime 

30 fractions -- fractional seconds (2^-64) 

31 seconds -- The number of seconds since 1/1/1904 

32 @rtype : datetime.datetime 

33 ''' 

34 if ( 

35 fractions is not None 

36 and seconds is not None 

37 and fractions + seconds > 0 

38 ): 

39 return datetime.timedelta( 

40 0, fractions * 2 ** -64 + seconds 

41 ) + datetime.datetime(1904, 1, 1) 

42 else: 

43 return None 

44 

45 

46# Enum mapping TDM data types to description string, numpy type where exists 

47# See Ref[2] for enum values 

48TDS_DATA_TYPE = dict( 

49 { 

50 0x00: 'void', # tdsTypeVoid 

51 0x01: 'int8', # tdsTypeI8 

52 0x02: 'int16', # tdsTypeI16 

53 0x03: 'int32', # tdsTypeI32 

54 0x04: 'int64', # tdsTypeI64 

55 0x05: 'uint8', # tdsTypeU8 

56 0x06: 'uint16', # tdsTypeU16 

57 0x07: 'uint32', # tdsTypeU32 

58 0x08: 'uint64', # tdsTypeU64 

59 0x09: 'float32', # tdsTypeSingleFloat 

60 0x0A: 'float64', # tdsTypeDoubleFloat 

61 0x0B: 'float128', # tdsTypeExtendedFloat 

62 0x19: 'singleFloatWithUnit', # tdsTypeSingleFloatWithUnit 

63 0x1A: 'doubleFloatWithUnit', # tdsTypeDoubleFloatWithUnit 

64 0x1B: 'extendedFloatWithUnit', # tdsTypeExtendedFloatWithUnit 

65 0x20: 'str', # tdsTypeString 

66 0x21: 'bool', # tdsTypeBoolean 

67 0x44: 'datetime', # tdsTypeTimeStamp 

68 0xFFFFFFFF: 'raw', # tdsTypeDAQmxRawData 

69 } 

70) 

71 

72# Function mapping for reading TDMS data types 

73TDS_READ_VAL = dict( 

74 { 

75 'void': lambda f: None, # tdsTypeVoid 

76 'int8': lambda f: struct.unpack('<b', f.read(1))[0], 

77 'int16': lambda f: struct.unpack('<h', f.read(2))[0], 

78 'int32': lambda f: struct.unpack('<i', f.read(4))[0], 

79 'int64': lambda f: struct.unpack('<q', f.read(8))[0], 

80 'uint8': lambda f: struct.unpack('<B', f.read(1))[0], 

81 'uint16': lambda f: struct.unpack('<H', f.read(2))[0], 

82 'uint32': lambda f: struct.unpack('<I', f.read(4))[0], 

83 'uint64': lambda f: struct.unpack('<Q', f.read(8))[0], 

84 'float32': lambda f: struct.unpack('<f', f.read(4))[0], 

85 'float64': lambda f: struct.unpack('<d', f.read(8))[0], 

86 'float128': type_not_supported, 

87 'singleFloatWithUnit': type_not_supported, 

88 'doubleFloatWithUnit': type_not_supported, 

89 'extendedFloatWithUnit': type_not_supported, 

90 'str': lambda f: f.read(struct.unpack('<i', f.read(4))[0]), 

91 'bool': lambda f: struct.unpack('<?', f.read(1))[0], 

92 'datetime': lambda f: parse_time_stamp( 

93 struct.unpack('<Q', f.read(8))[0], 

94 struct.unpack('<q', f.read(8))[0], 

95 ), 

96 'raw': type_not_supported, 

97 } 

98) 

99 

100DECIMATE_MASK = 0b00100000 

101LEAD_IN_LENGTH = 28 

102FILEINFO_NAMES = ( 

103 'file_tag', 

104 'toc', 

105 'version', 

106 'next_segment_offset', 

107 'raw_data_offset', 

108) 

109 

110 

111class TdmsReader(object): 

112 '''A TDMS file reader object for reading properties and data''' 

113 

114 def __init__(self, filename): 

115 self._properties = None 

116 self._end_of_properties_offset = None 

117 self._data_type = None 

118 self._chunk_size = None 

119 

120 self._raw_data = None 

121 self._raw_data2 = None # The mapped data in the 'Next Segment' 

122 self._raw_last_chunk = None 

123 self._raw2_last_chunk = None 

124 

125 self.file_size = op.getsize(filename) 

126 self._channel_length = None 

127 self._seg1_length = None 

128 self._seg2_length = None 

129 

130 # TODO: Error if file not big enough to hold header 

131 self._tdms_file = open(filename, 'rb') 

132 # Read lead in (28 bytes): 

133 lead_in = self._tdms_file.read(LEAD_IN_LENGTH) 

134 # lead_in is 28 bytes: 

135 # [string of length 4][int32][int32][int64][int64] 

136 fields = struct.unpack('<4siiQQ', lead_in) 

137 

138 if fields[0].decode() not in 'TDSm': 

139 msg = 'Not a TDMS file (TDSm tag not found)' 

140 raise (TypeError, msg) 

141 

142 self.fileinfo = dict(zip(FILEINFO_NAMES, fields)) 

143 self.fileinfo['decimated'] = not bool( 

144 self.fileinfo['toc'] & DECIMATE_MASK 

145 ) 

146 # Make offsets relative to beginning of file: 

147 self.fileinfo['next_segment_offset'] += LEAD_IN_LENGTH 

148 self.fileinfo['raw_data_offset'] += LEAD_IN_LENGTH 

149 self.fileinfo['file_size'] = op.getsize(self._tdms_file.name) 

150 

151 # TODO: Validate lead in: 

152 if self.fileinfo['next_segment_offset'] > self.file_size: 

153 self.fileinfo['next_segment_offset'] = self.file_size 

154 # raise(ValueError, "Next Segment Offset too large in TDMS header") 

155 

156 def __enter__(self): 

157 return self 

158 

159 def __exit__(self, exc_type, exc_value, traceback): 

160 self._tdms_file.close() 

161 

162 @property 

163 def channel_length(self): 

164 if self._properties is None: 

165 self.get_properties() 

166 

167 rdo = num.int64(self.fileinfo['raw_data_offset']) 

168 nch = num.int64(self.n_channels) 

169 nso = self.fileinfo['next_segment_offset'] 

170 return num.int64( 

171 (nso - rdo) / nch / num.dtype(self._data_type).itemsize) 

172 

173 @property 

174 def n_channels(self): 

175 if self._properties is None: 

176 self.get_properties() 

177 return self.fileinfo['n_channels'] 

178 

179 def get_properties(self, mapped=False): 

180 ''' 

181 Return a dictionary of properties. Read from file only if necessary. 

182 ''' 

183 # Check if already hold properties in memory 

184 if self._properties is None: 

185 self._properties = self._read_properties() 

186 return self._properties 

187 

188 def _read_property(self): 

189 ''' 

190 Read a single property from the TDMS file. 

191 Return the name, type and value of the property as a list. 

192 ''' 

193 # Read length of object path: 

194 var = struct.unpack('<i', self._tdms_file.read(4))[0] 

195 # Read property name and type: 

196 name, data_type = struct.unpack( 

197 '<{0}si'.format(var), self._tdms_file.read(var + 4) 

198 ) 

199 # Lookup function to read and parse property value based on type: 

200 value = TDS_READ_VAL[TDS_DATA_TYPE[data_type]](self._tdms_file) 

201 name = name.decode() 

202 if data_type == 32: 

203 value = value.decode() 

204 

205 return name, data_type, value 

206 

207 def _read_properties(self): 

208 '''Read the properties from the file''' 

209 self._tdms_file.seek(LEAD_IN_LENGTH, 0) 

210 # Number of channels is total objects - file objects - group objects 

211 self.fileinfo['n_channels'] = ( 

212 struct.unpack('i', self._tdms_file.read(4))[0] - 2 

213 ) 

214 # Read length of object path: 

215 var = struct.unpack('<i', self._tdms_file.read(4))[0] 

216 # skip over object path and raw data index: 

217 self._tdms_file.seek(var + 4, 1) 

218 # Read number of properties in this group: 

219 var = struct.unpack('<i', self._tdms_file.read(4))[0] 

220 

221 # loop through and read each property 

222 properties = [self._read_property() for _ in range(var)] 

223 properties = {prop: value for (prop, type, value) in properties} 

224 

225 self._end_of_properties_offset = self._tdms_file.tell() 

226 

227 self._read_chunk_size() 

228 # TODO: Add number of channels to properties 

229 return properties 

230 

231 def _read_chunk_size(self): 

232 '''Read the data chunk size from the TDMS file header.''' 

233 if self._end_of_properties_offset is None: 

234 self._read_properties() 

235 

236 self._tdms_file.seek(self._end_of_properties_offset, 0) 

237 

238 # skip over Group Information: 

239 var = struct.unpack('<i', self._tdms_file.read(4))[0] 

240 self._tdms_file.seek(var + 8, 1) 

241 

242 # skip over first channel path and length of index information: 

243 var = struct.unpack('<i', self._tdms_file.read(4))[0] 

244 self._tdms_file.seek(var + 4, 1) 

245 

246 self._data_type = TDS_DATA_TYPE.get( 

247 struct.unpack('<i', self._tdms_file.read(4))[0] 

248 ) 

249 if self._data_type not in ('int16', 'float32'): 

250 raise Exception('Unsupported TDMS data type: ' + self._data_type) 

251 

252 # Read Dimension of the raw data array (has to be 1): 

253 # dummy = struct.unpack("<i", self._tdms_file.read(4))[0] 

254 

255 self._chunk_size = struct.unpack('<i', self._tdms_file.read(4))[0] 

256 

257 def get_data(self, first_ch=0, last_ch=None, first_s=0, last_s=None): 

258 ''' 

259 Get a block of data from the TDMS file. 

260 first_ch -- The first channel to load 

261 last_ch -- The last channel to load 

262 first_s -- The first sample to load 

263 last_s -- The last sample to load 

264 ''' 

265 if self._raw_data is None: 

266 self._initialise_data() 

267 if first_ch is None or first_ch < 0: 

268 first_ch = 0 

269 if last_ch is None or last_ch >= self.n_channels: 

270 last_ch = self.n_channels 

271 else: 

272 last_ch += 1 

273 if last_s is None or last_s > self._channel_length: 

274 last_s = self._channel_length 

275 else: 

276 last_s += 1 

277 nch = num.int64(max(last_ch - first_ch, 0)) 

278 ns = num.int64(max(last_s - first_s, 0)) 

279 

280 # Allocate output container 

281 data = num.empty((ns, nch), dtype=num.dtype(self._data_type)) 

282 if data.size == 0: 

283 return data 

284 

285 # 1. Index first block & reshape? 

286 first_blk = first_s // self._chunk_size 

287 last_blk = last_s // self._chunk_size 

288 last_full_blk = min(last_blk + 1, self._raw_data.shape[1]) 

289 nchunk = min( 

290 max(last_full_blk - first_blk, 0), self._raw_data.shape[1] 

291 ) 

292 first_s_1a = max(first_s - first_blk * self._chunk_size, 0) 

293 last_s_1a = min( 

294 last_s - first_blk * self._chunk_size, nchunk * self._chunk_size 

295 ) 

296 ind_s = 0 

297 ind_e = ind_s + max(last_s_1a - first_s_1a, 0) 

298 

299 d = self._raw_data[:, first_blk:last_full_blk, first_ch:last_ch] 

300 d.shape = (self._chunk_size * nchunk, nch) 

301 d.reshape((self._chunk_size * nchunk, nch), order='F') 

302 data[ind_s:ind_e, :] = d[first_s_1a:last_s_1a, :] 

303 

304 # 2. Index first additional samples 

305 first_s_1b = max( 

306 first_s - self._raw_data.shape[1] * self._chunk_size, 0 

307 ) 

308 last_s_1b = min( 

309 last_s - self._raw_data.shape[1] * self._chunk_size, 

310 self._raw_last_chunk.shape[0], 

311 ) 

312 ind_s = ind_e 

313 ind_e = ind_s + max(last_s_1b - first_s_1b, 0) 

314 # data_1b = self._raw_last_chunk[first_s_1b:last_s_1b,first_ch:last_ch] 

315 if ind_e > ind_s: 

316 data[ind_s:ind_e, :] = self._raw_last_chunk[ 

317 first_s_1b:last_s_1b, first_ch:last_ch 

318 ] 

319 

320 # 3. Index second block 

321 first_s_2 = max(first_s - self._seg1_length, 0) 

322 last_s_2 = last_s - self._seg1_length 

323 if (first_s_2 > 0 or last_s_2 > 0) and self._raw_data2 is not None: 

324 first_blk_2 = max(first_s_2 // self._chunk_size, 0) 

325 last_blk_2 = max(last_s_2 // self._chunk_size, 0) 

326 last_full_blk_2 = min(last_blk_2 + 1, self._raw_data2.shape[1]) 

327 nchunk_2 = min( 

328 max(last_full_blk_2 - first_blk_2, 0), self._raw_data2.shape[1] 

329 ) 

330 first_s_2a = max(first_s_2 - first_blk_2 * self._chunk_size, 0) 

331 last_s_2a = min( 

332 last_s_2 - first_blk_2 * self._chunk_size, 

333 nchunk_2 * self._chunk_size, 

334 ) 

335 ind_s = ind_e 

336 ind_e = ind_s + max(last_s_2a - first_s_2a, 0) 

337 # data_2a = self._raw_data2[:, first_blk_2:last_full_blk_2, 

338 # first_ch:last_ch]\ 

339 # .reshape((self._chunk_size*nchunk_2, nch), order='F')\ 

340 # [first_s_2a:last_s_2a, :] 

341 if ind_e > ind_s: 

342 data[ind_s:ind_e, :] = self._raw_data2[ 

343 :, first_blk_2:last_full_blk_2, first_ch:last_ch 

344 ].reshape((self._chunk_size * nchunk_2, nch), order='F')[ 

345 first_s_2a:last_s_2a, : 

346 ] 

347 # 4. Index second additional samples 

348 if ( 

349 first_s_2 > 0 or last_s_2 > 0 

350 ) and self._raw2_last_chunk is not None: 

351 first_s_2b = max( 

352 first_s_2 - self._raw_data2.shape[1] * self._chunk_size, 0 

353 ) 

354 last_s_2b = min( 

355 last_s_2 - self._raw_data2.shape[1] * self._chunk_size, 

356 self._raw2_last_chunk.shape[0], 

357 ) 

358 ind_s = ind_e 

359 ind_e = ind_s + max(last_s_2b - first_s_2b, 0) 

360 # data_2b = \ 

361 # self._raw2_last_chunk[first_s_2b:last_s_2b,first_ch:last_ch] 

362 if ind_e > ind_s: 

363 data[ind_s:ind_e, :] = self._raw2_last_chunk[ 

364 first_s_2b:last_s_2b, first_ch:last_ch 

365 ] 

366 # 5. Concatenate blocks 

367 # data = num.concatenate((data_1a, data_1b, data_2a, data_2b)) 

368 if data.size == 0: 

369 data = data.reshape(0, 0) 

370 return data 

371 

372 def _initialise_data(self): 

373 '''Initialise the memory map for the data array.''' 

374 if self._chunk_size is None: 

375 self._read_chunk_size() 

376 

377 dmap = mmap.mmap(self._tdms_file.fileno(), 0, access=mmap.ACCESS_READ) 

378 rdo = num.int64(self.fileinfo['raw_data_offset']) 

379 nch = num.int64(self.n_channels) 

380 

381 # TODO: Support streaming file type? 

382 # TODO: Is this a valid calculation for ChannelLength? 

383 nso = self.fileinfo['next_segment_offset'] 

384 self._seg1_length = num.int64( 

385 (nso - rdo) / nch / num.dtype(self._data_type).itemsize 

386 ) 

387 self._channel_length = self._seg1_length 

388 

389 if self.fileinfo['decimated']: 

390 n_complete_blk = num.int64(self._seg1_length / self._chunk_size) 

391 ax_ord = 'C' 

392 else: 

393 n_complete_blk = 0 

394 ax_ord = 'F' 

395 self._raw_data = num.ndarray( 

396 (n_complete_blk, nch, self._chunk_size), 

397 dtype=self._data_type, 

398 buffer=dmap, 

399 offset=rdo, 

400 ) 

401 # Rotate the axes to [chunk_size, nblk, nch] 

402 self._raw_data = num.rollaxis(self._raw_data, 2) 

403 additional_samples = num.int64( 

404 self._seg1_length - n_complete_blk * self._chunk_size 

405 ) 

406 additional_samples_offset = ( 

407 rdo 

408 + n_complete_blk 

409 * nch 

410 * self._chunk_size 

411 * num.dtype(self._data_type).itemsize 

412 ) 

413 self._raw_last_chunk = num.ndarray( 

414 (nch, additional_samples), 

415 dtype=self._data_type, 

416 buffer=dmap, 

417 offset=additional_samples_offset, 

418 order=ax_ord, 

419 ) 

420 # Rotate the axes to [samples, nch] 

421 self._raw_last_chunk = num.rollaxis(self._raw_last_chunk, 1) 

422 

423 if self.file_size == nso: 

424 self._seg2_length = 0 

425 else: 

426 self._tdms_file.seek(nso + 12, 0) 

427 (seg2_nso, seg2_rdo) = struct.unpack( 

428 '<qq', self._tdms_file.read(2 * 8) 

429 ) 

430 self._seg2_length = ( 

431 (seg2_nso - seg2_rdo) 

432 / nch 

433 / num.dtype(self._data_type).itemsize 

434 ) 

435 if self.fileinfo['decimated']: 

436 n_complete_blk2 = num.int64( 

437 self._seg2_length / self._chunk_size) 

438 else: 

439 n_complete_blk2 = num.int64(0) 

440 self._raw_data2 = num.ndarray( 

441 (n_complete_blk2, nch, self._chunk_size), 

442 dtype=self._data_type, 

443 buffer=dmap, 

444 offset=(nso + LEAD_IN_LENGTH + seg2_rdo), 

445 ) 

446 self._raw_data2 = num.rollaxis(self._raw_data2, 2) 

447 additional_samples = num.int64( 

448 self._seg2_length - n_complete_blk2 * self._chunk_size 

449 ) 

450 additional_samples_offset = ( 

451 nso 

452 + LEAD_IN_LENGTH 

453 + seg2_rdo 

454 + n_complete_blk2 

455 * nch 

456 * self._chunk_size 

457 * num.dtype(self._data_type).itemsize 

458 ) 

459 self._raw2_last_chunk = num.ndarray( 

460 (nch, additional_samples), 

461 dtype=self._data_type, 

462 buffer=dmap, 

463 offset=additional_samples_offset, 

464 order=ax_ord, 

465 ) 

466 # Rotate the axes to [samples, nch] 

467 self._raw2_last_chunk = num.rollaxis(self._raw2_last_chunk, 1) 

468 

469 if self._raw_data2.size != 0 or self._raw2_last_chunk.size != 0: 

470 pass 

471 # raise Exception('Second segment contains some data, \ 

472 # not currently supported') 

473 self._channel_length = self._seg1_length + self._seg2_length 

474 # else: 

475 # print "Not decimated" 

476 # raise Exception('Reading file with decimated flag not set is not' 

477 # ' supported yet') 

478 

479 

480META_KEYS = { 

481 'measure_length': 'MeasureLength[m]', 

482 'start_position': 'StartPosition[m]', 

483 'spatial_resolution': 'SpatialResolution[m]', 

484 'fibre_index': 'FibreIndex', 

485 'unit_calibration': 'Unit Calibration (nm)', 

486 'start_distance': 'Start Distance (m)', 

487 'stop_distance': 'Stop Distance (m)', 

488 'normalization': 'Normalization', 

489 'decimation_filter': 'Decimation Filter', 

490 'gauge_length': 'GaugeLength', 

491 'norm_offset': 'Norm Offset', 

492 'source_mode': 'Source Mode', 

493 'time_decimation': 'Time Decimation', 

494 'zero_offset': 'Zero Offset (m)', 

495 'p_parameter': 'P', 

496 'p_coefficients': 'P Coefficients', 

497 'idas_version': 'iDASVersion', 

498 'precice_sampling_freq': 'Precise Sampling Frequency (Hz)', 

499 'receiver_gain': 'Receiver Gain', 

500 'continuous_mode': 'Continuous Mode', 

501 'geo_lat': 'SystemInfomation.GPS.Latitude', 

502 'geo_lon': 'SystemInfomation.GPS.Longitude', 

503 'geo_elevation': 'SystemInfomation.GPS.Altitude', 

504 

505 'channel': None, 

506 'unit': None 

507} 

508 

509 

510def get_meta(tdms_properties): 

511 prop = tdms_properties 

512 

513 deltat = 1. / prop['SamplingFrequency[Hz]'] 

514 tmin = prop['GPSTimeStamp'].timestamp() 

515 

516 fibre_meta = {key: prop.get(key_map, -1) 

517 for key, key_map in META_KEYS.items() 

518 if key_map is not None} 

519 

520 coeff = fibre_meta['p_coefficients'] 

521 try: 

522 coeff = tuple(map(float, coeff.split('\t'))) 

523 except ValueError: 

524 coeff = tuple(map(float, coeff.split(';'))) 

525 

526 gain = fibre_meta['receiver_gain'] 

527 try: 

528 gain = tuple(map(float, gain.split('\t'))) 

529 except ValueError: 

530 gain = tuple(map(float, gain.split(';'))) 

531 fibre_meta['receiver_gain'] = coeff 

532 

533 fibre_meta['unit'] = 'radians' 

534 

535 return deltat, tmin, fibre_meta 

536 

537 

538def iload(filename, load_data=True): 

539 tdms = TdmsReader(filename) 

540 deltat, tmin, meta = get_meta(tdms.get_properties()) 

541 

542 data = tdms.get_data().T.copy() if load_data else None 

543 

544 for icha in range(tdms.n_channels): 

545 meta_cha = meta.copy() 

546 

547 assert icha < 99999 

548 station = '%05i' % icha 

549 meta_cha['channel'] = icha 

550 

551 nsamples = tdms.channel_length 

552 

553 tr = trace.Trace( 

554 network='DA', 

555 station=station, 

556 ydata=None, 

557 deltat=deltat, 

558 tmin=tmin, 

559 tmax=tmin + (nsamples - 1) * deltat, 

560 meta=meta_cha) 

561 

562 if data is not None: 

563 tr.set_ydata(data[icha]) 

564 

565 yield tr 

566 

567 

568def detect(first512): 

569 return first512.startswith(b'TDSm.')