Coverage for /usr/local/lib/python3.9/dist-packages/pyrocko/io/tdms

1import logging

2import os.path as op

3import struct

4import datetime

5import mmap

6import numpy as num

8from pyrocko import trace

10logger = logging.getLogger(__name__)

13def write_property_dict(prop_dict, out_file):

14 from pprint import pformat

16 f = open(out_file, "w")

17 f.write("tdms_property_map=")

18 f.write(pformat(prop_dict))

19 f.close()

22def type_not_supported(vargin):

23 """Function raises a NotImplementedException."""

24 raise NotImplementedError("Reading of this tdsDataType is not implemented")

27def parse_time_stamp(fractions, seconds):

28 """

29 Convert time TDMS time representation to datetime

30 fractions -- fractional seconds (2^-64)

31 seconds -- The number of seconds since 1/1/1904

32 @rtype : datetime.datetime

33 """

34 if (

35 fractions is not None

36 and seconds is not None

37 and fractions + seconds > 0

38 ):

39 return datetime.timedelta(

40 0, fractions * 2 ** -64 + seconds

41 ) + datetime.datetime(1904, 1, 1)

42 else:

43 return None

46# Enum mapping TDM data types to description string, numpy type where exists

47# See Ref[2] for enum values

48TDS_DATA_TYPE = dict(

49 {

50 0x00: "void", # tdsTypeVoid

51 0x01: "int8", # tdsTypeI8

52 0x02: "int16", # tdsTypeI16

53 0x03: "int32", # tdsTypeI32

54 0x04: "int64", # tdsTypeI64

55 0x05: "uint8", # tdsTypeU8

56 0x06: "uint16", # tdsTypeU16

57 0x07: "uint32", # tdsTypeU32

58 0x08: "uint64", # tdsTypeU64

59 0x09: "float32", # tdsTypeSingleFloat

60 0x0A: "float64", # tdsTypeDoubleFloat

61 0x0B: "float128", # tdsTypeExtendedFloat

62 0x19: "singleFloatWithUnit", # tdsTypeSingleFloatWithUnit

63 0x1A: "doubleFloatWithUnit", # tdsTypeDoubleFloatWithUnit

64 0x1B: "extendedFloatWithUnit", # tdsTypeExtendedFloatWithUnit

65 0x20: "str", # tdsTypeString

66 0x21: "bool", # tdsTypeBoolean

67 0x44: "datetime", # tdsTypeTimeStamp

68 0xFFFFFFFF: "raw", # tdsTypeDAQmxRawData

69 }

70)

72# Function mapping for reading TDMS data types

73TDS_READ_VAL = dict(

74 {

75 "void": lambda f: None, # tdsTypeVoid

76 "int8": lambda f: struct.unpack("<b", f.read(1))[0],

77 "int16": lambda f: struct.unpack("<h", f.read(2))[0],

78 "int32": lambda f: struct.unpack("<i", f.read(4))[0],

79 "int64": lambda f: struct.unpack("<q", f.read(8))[0],

80 "uint8": lambda f: struct.unpack("<B", f.read(1))[0],

81 "uint16": lambda f: struct.unpack("<H", f.read(2))[0],

82 "uint32": lambda f: struct.unpack("<I", f.read(4))[0],

83 "uint64": lambda f: struct.unpack("<Q", f.read(8))[0],

84 "float32": lambda f: struct.unpack("<f", f.read(4))[0],

85 "float64": lambda f: struct.unpack("<d", f.read(8))[0],

86 "float128": type_not_supported,

87 "singleFloatWithUnit": type_not_supported,

88 "doubleFloatWithUnit": type_not_supported,

89 "extendedFloatWithUnit": type_not_supported,

90 "str": lambda f: f.read(struct.unpack("<i", f.read(4))[0]),

91 "bool": lambda f: struct.unpack("<?", f.read(1))[0],

92 "datetime": lambda f: parse_time_stamp(

93 struct.unpack("<Q", f.read(8))[0],

94 struct.unpack("<q", f.read(8))[0],

95 ),

96 "raw": type_not_supported,

97 }

98)

100DECIMATE_MASK = 0b00100000

101LEAD_IN_LENGTH = 28

102FILEINFO_NAMES = (

103 "file_tag",

104 "toc",

105 "version",

106 "next_segment_offset",

107 "raw_data_offset",

108)

109

110

111class TdmsReader(object):

112 """A TDMS file reader object for reading properties and data"""

113

114 def __init__(self, filename):

115 self._properties = None

116 self._end_of_properties_offset = None

117 self._data_type = None

118 self._chunk_size = None

119

120 self._raw_data = None

121 self._raw_data2 = None # The mapped data in the 'Next Segment'

122 self._raw_last_chunk = None

123 self._raw2_last_chunk = None

124

125 self.file_size = op.getsize(filename)

126 self._channel_length = None

127 self._seg1_length = None

128 self._seg2_length = None

129

130 # TODO: Error if file not big enough to hold header

131 self._tdms_file = open(filename, "rb")

132 # Read lead in (28 bytes):

133 lead_in = self._tdms_file.read(LEAD_IN_LENGTH)

134 # lead_in is 28 bytes:

135 # [string of length 4][int32][int32][int64][int64]

136 fields = struct.unpack("<4siiQQ", lead_in)

137

138 if fields[0].decode() not in "TDSm":

139 msg = "Not a TDMS file (TDSm tag not found)"

140 raise (TypeError, msg)

141

142 self.fileinfo = dict(zip(FILEINFO_NAMES, fields))

143 self.fileinfo["decimated"] = not bool(

144 self.fileinfo["toc"] & DECIMATE_MASK

145 )

146 # Make offsets relative to beginning of file:

147 self.fileinfo["next_segment_offset"] += LEAD_IN_LENGTH

148 self.fileinfo["raw_data_offset"] += LEAD_IN_LENGTH

149 self.fileinfo["file_size"] = op.getsize(self._tdms_file.name)

150

151 # TODO: Validate lead in:

152 if self.fileinfo["next_segment_offset"] > self.file_size:

153 self.fileinfo["next_segment_offset"] = self.file_size

154 # raise(ValueError, "Next Segment Offset too large in TDMS header")

155

156 def __enter__(self):

157 return self

158

159 def __exit__(self, exc_type, exc_value, traceback):

160 self._tdms_file.close()

161

162 @property

163 def channel_length(self):

164 if self._properties is None:

165 self.get_properties()

166

167 rdo = num.int64(self.fileinfo["raw_data_offset"])

168 nch = num.int64(self.n_channels)

169 nso = self.fileinfo["next_segment_offset"]

170 return num.int64(

171 (nso - rdo) / nch / num.dtype(self._data_type).itemsize)

172

173 @property

174 def n_channels(self):

175 if self._properties is None:

176 self.get_properties()

177 return self.fileinfo['n_channels']

178

179 def get_properties(self, mapped=False):

180 """

181 Return a dictionary of properties. Read from file only if necessary.

182 """

183 # Check if already hold properties in memory

184 if self._properties is None:

185 self._properties = self._read_properties()

186 return self._properties

187

188 def _read_property(self):

189 """

190 Read a single property from the TDMS file.

191 Return the name, type and value of the property as a list.

192 """

193 # Read length of object path:

194 var = struct.unpack("<i", self._tdms_file.read(4))[0]

195 # Read property name and type:

196 name, data_type = struct.unpack(

197 "<{0}si".format(var), self._tdms_file.read(var + 4)

198 )

199 # Lookup function to read and parse property value based on type:

200 value = TDS_READ_VAL[TDS_DATA_TYPE[data_type]](self._tdms_file)

201 name = name.decode()

202 if data_type == 32:

203 value = value.decode()

204

205 return name, data_type, value

206

207 def _read_properties(self):

208 """Read the properties from the file"""

209 self._tdms_file.seek(LEAD_IN_LENGTH, 0)

210 # Number of channels is total objects - file objects - group objects

211 self.fileinfo["n_channels"] = (

212 struct.unpack("i", self._tdms_file.read(4))[0] - 2

213 )

214 # Read length of object path:

215 var = struct.unpack("<i", self._tdms_file.read(4))[0]

216 # skip over object path and raw data index:

217 self._tdms_file.seek(var + 4, 1)

218 # Read number of properties in this group:

219 var = struct.unpack("<i", self._tdms_file.read(4))[0]

220

221 # loop through and read each property

222 properties = [self._read_property() for _ in range(var)]

223 properties = {prop: value for (prop, type, value) in properties}

224

225 self._end_of_properties_offset = self._tdms_file.tell()

226

227 self._read_chunk_size()

228 # TODO: Add number of channels to properties

229 return properties

230

231 def _read_chunk_size(self):

232 """Read the data chunk size from the TDMS file header."""

233 if self._end_of_properties_offset is None:

234 self._read_properties()

235

236 self._tdms_file.seek(self._end_of_properties_offset, 0)

237

238 # skip over Group Information:

239 var = struct.unpack("<i", self._tdms_file.read(4))[0]

240 self._tdms_file.seek(var + 8, 1)

241

242 # skip over first channel path and length of index information:

243 var = struct.unpack("<i", self._tdms_file.read(4))[0]

244 self._tdms_file.seek(var + 4, 1)

245

246 self._data_type = TDS_DATA_TYPE.get(

247 struct.unpack("<i", self._tdms_file.read(4))[0]

248 )

249 if self._data_type not in ("int16", "float32"):

250 raise Exception("Unsupported TDMS data type: " + self._data_type)

251

252 # Read Dimension of the raw data array (has to be 1):

253 # dummy = struct.unpack("<i", self._tdms_file.read(4))[0]

254

255 self._chunk_size = struct.unpack("<i", self._tdms_file.read(4))[0]

256

257 def get_data(self, first_ch=0, last_ch=None, first_s=0, last_s=None):

258 """

259 Get a block of data from the TDMS file.

260 first_ch -- The first channel to load

261 last_ch -- The last channel to load

262 first_s -- The first sample to load

263 last_s -- The last sample to load

264 """

265 if self._raw_data is None:

266 self._initialise_data()

267 if first_ch is None or first_ch < 0:

268 first_ch = 0

269 if last_ch is None or last_ch >= self.n_channels:

270 last_ch = self.n_channels

271 else:

272 last_ch += 1

273 if last_s is None or last_s > self._channel_length:

274 last_s = self._channel_length

275 else:

276 last_s += 1

277 nch = num.int64(max(last_ch - first_ch, 0))

278 ns = num.int64(max(last_s - first_s, 0))

279

280 # Allocate output container

281 data = num.empty((ns, nch), dtype=num.dtype(self._data_type))

282 if data.size == 0:

283 return data

284

285 # 1. Index first block & reshape?

286 first_blk = first_s // self._chunk_size

287 last_blk = last_s // self._chunk_size

288 last_full_blk = min(last_blk + 1, self._raw_data.shape[1])

289 nchunk = min(

290 max(last_full_blk - first_blk, 0), self._raw_data.shape[1]

291 )

292 first_s_1a = max(first_s - first_blk * self._chunk_size, 0)

293 last_s_1a = min(

294 last_s - first_blk * self._chunk_size, nchunk * self._chunk_size

295 )

296 ind_s = 0

297 ind_e = ind_s + max(last_s_1a - first_s_1a, 0)

298

299 d = self._raw_data[:, first_blk:last_full_blk, first_ch:last_ch]

300 d.shape = (self._chunk_size * nchunk, nch)

301 d.reshape((self._chunk_size * nchunk, nch), order="F")

302 data[ind_s:ind_e, :] = d[first_s_1a:last_s_1a, :]

303

304 # 2. Index first additional samples

305 first_s_1b = max(

306 first_s - self._raw_data.shape[1] * self._chunk_size, 0

307 )

308 last_s_1b = min(

309 last_s - self._raw_data.shape[1] * self._chunk_size,

310 self._raw_last_chunk.shape[0],

311 )

312 ind_s = ind_e

313 ind_e = ind_s + max(last_s_1b - first_s_1b, 0)

314 # data_1b = self._raw_last_chunk[first_s_1b:last_s_1b,first_ch:last_ch]

315 if ind_e > ind_s:

316 data[ind_s:ind_e, :] = self._raw_last_chunk[

317 first_s_1b:last_s_1b, first_ch:last_ch

318 ]

319

320 # 3. Index second block

321 first_s_2 = max(first_s - self._seg1_length, 0)

322 last_s_2 = last_s - self._seg1_length

323 if (first_s_2 > 0 or last_s_2 > 0) and self._raw_data2 is not None:

324 first_blk_2 = max(first_s_2 // self._chunk_size, 0)

325 last_blk_2 = max(last_s_2 // self._chunk_size, 0)

326 last_full_blk_2 = min(last_blk_2 + 1, self._raw_data2.shape[1])

327 nchunk_2 = min(

328 max(last_full_blk_2 - first_blk_2, 0), self._raw_data2.shape[1]

329 )

330 first_s_2a = max(first_s_2 - first_blk_2 * self._chunk_size, 0)

331 last_s_2a = min(

332 last_s_2 - first_blk_2 * self._chunk_size,

333 nchunk_2 * self._chunk_size,

334 )

335 ind_s = ind_e

336 ind_e = ind_s + max(last_s_2a - first_s_2a, 0)

337 # data_2a = self._raw_data2[:, first_blk_2:last_full_blk_2,

338 # first_ch:last_ch]\

339 # .reshape((self._chunk_size*nchunk_2, nch), order='F')\

340 # [first_s_2a:last_s_2a, :]

341 if ind_e > ind_s:

342 data[ind_s:ind_e, :] = self._raw_data2[

343 :, first_blk_2:last_full_blk_2, first_ch:last_ch

344 ].reshape((self._chunk_size * nchunk_2, nch), order="F")[

345 first_s_2a:last_s_2a, :

346 ]

347 # 4. Index second additional samples

348 if (

349 first_s_2 > 0 or last_s_2 > 0

350 ) and self._raw2_last_chunk is not None:

351 first_s_2b = max(

352 first_s_2 - self._raw_data2.shape[1] * self._chunk_size, 0

353 )

354 last_s_2b = min(

355 last_s_2 - self._raw_data2.shape[1] * self._chunk_size,

356 self._raw2_last_chunk.shape[0],

357 )

358 ind_s = ind_e

359 ind_e = ind_s + max(last_s_2b - first_s_2b, 0)

360 # data_2b = \

361 # self._raw2_last_chunk[first_s_2b:last_s_2b,first_ch:last_ch]

362 if ind_e > ind_s:

363 data[ind_s:ind_e, :] = self._raw2_last_chunk[

364 first_s_2b:last_s_2b, first_ch:last_ch

365 ]

366 # 5. Concatenate blocks

367 # data = num.concatenate((data_1a, data_1b, data_2a, data_2b))

368 if data.size == 0:

369 data = data.reshape(0, 0)

370 return data

371

372 def _initialise_data(self):

373 """Initialise the memory map for the data array."""

374 if self._chunk_size is None:

375 self._read_chunk_size()

376

377 dmap = mmap.mmap(self._tdms_file.fileno(), 0, access=mmap.ACCESS_READ)

378 rdo = num.int64(self.fileinfo["raw_data_offset"])

379 nch = num.int64(self.n_channels)

380

381 # TODO: Support streaming file type?

382 # TODO: Is this a valid calculation for ChannelLength?

383 nso = self.fileinfo["next_segment_offset"]

384 self._seg1_length = num.int64(

385 (nso - rdo) / nch / num.dtype(self._data_type).itemsize

386 )

387 self._channel_length = self._seg1_length

388

389 if self.fileinfo["decimated"]:

390 n_complete_blk = num.int64(self._seg1_length / self._chunk_size)

391 ax_ord = "C"

392 else:

393 n_complete_blk = 0

394 ax_ord = "F"

395 self._raw_data = num.ndarray(

396 (n_complete_blk, nch, self._chunk_size),

397 dtype=self._data_type,

398 buffer=dmap,

399 offset=rdo,

400 )

401 # Rotate the axes to [chunk_size, nblk, nch]

402 self._raw_data = num.rollaxis(self._raw_data, 2)

403 additional_samples = num.int64(

404 self._seg1_length - n_complete_blk * self._chunk_size

405 )

406 additional_samples_offset = (

407 rdo

408 + n_complete_blk

409 * nch

410 * self._chunk_size

411 * num.dtype(self._data_type).itemsize

412 )

413 self._raw_last_chunk = num.ndarray(

414 (nch, additional_samples),

415 dtype=self._data_type,

416 buffer=dmap,

417 offset=additional_samples_offset,

418 order=ax_ord,

419 )

420 # Rotate the axes to [samples, nch]

421 self._raw_last_chunk = num.rollaxis(self._raw_last_chunk, 1)

422

423 if self.file_size == nso:

424 self._seg2_length = 0

425 else:

426 self._tdms_file.seek(nso + 12, 0)

427 (seg2_nso, seg2_rdo) = struct.unpack(

428 "<qq", self._tdms_file.read(2 * 8)

429 )

430 self._seg2_length = (

431 (seg2_nso - seg2_rdo)

432 / nch

433 / num.dtype(self._data_type).itemsize

434 )

435 if self.fileinfo["decimated"]:

436 n_complete_blk2 = num.int64(

437 self._seg2_length / self._chunk_size)

438 else:

439 n_complete_blk2 = num.int64(0)

440 self._raw_data2 = num.ndarray(

441 (n_complete_blk2, nch, self._chunk_size),

442 dtype=self._data_type,

443 buffer=dmap,

444 offset=(nso + LEAD_IN_LENGTH + seg2_rdo),

445 )

446 self._raw_data2 = num.rollaxis(self._raw_data2, 2)

447 additional_samples = num.int64(

448 self._seg2_length - n_complete_blk2 * self._chunk_size

449 )

450 additional_samples_offset = (

451 nso

452 + LEAD_IN_LENGTH

453 + seg2_rdo

454 + n_complete_blk2

455 * nch

456 * self._chunk_size

457 * num.dtype(self._data_type).itemsize

458 )

459 self._raw2_last_chunk = num.ndarray(

460 (nch, additional_samples),

461 dtype=self._data_type,

462 buffer=dmap,

463 offset=additional_samples_offset,

464 order=ax_ord,

465 )

466 # Rotate the axes to [samples, nch]

467 self._raw2_last_chunk = num.rollaxis(self._raw2_last_chunk, 1)

468

469 if self._raw_data2.size != 0 or self._raw2_last_chunk.size != 0:

470 pass

471 # raise Exception('Second segment contains some data, \

472 # not currently supported')

473 self._channel_length = self._seg1_length + self._seg2_length

474 # else:

475 # print "Not decimated"

476 # raise Exception('Reading file with decimated flag not set is not'

477 # ' supported yet')

478

479

480META_KEYS = {

481 'measure_length': 'MeasureLength[m]',

482 'start_position': 'StartPosition[m]',

483 'spatial_resolution': 'SpatialResolution[m]',

484 'fibre_index': 'FibreIndex',

485 'unit_calibration': 'Unit Calibration (nm)',

486 'start_distance': 'Start Distance (m)',

487 'stop_distance': 'Stop Distance (m)',

488 'normalization': 'Normalization',

489 'decimation_filter': 'Decimation Filter',

490 'gauge_length': 'GaugeLength',

491 'norm_offset': 'Norm Offset',

492 'source_mode': 'Source Mode',

493 'time_decimation': 'Time Decimation',

494 'zero_offset': 'Zero Offset (m)',

495 'p_parameter': 'P',

496 'p_coefficients': 'P Coefficients',

497 'idas_version': 'iDASVersion',

498 'precice_sampling_freq': 'Precise Sampling Frequency (Hz)',

499 'receiver_gain': 'Receiver Gain',

500 'continuous_mode': 'Continuous Mode',

501 'geo_lat': 'SystemInfomation.GPS.Latitude',

502 'geo_lon': 'SystemInfomation.GPS.Longitude',

503 'geo_elevation': 'SystemInfomation.GPS.Altitude',

504

505 'channel': None,

506 'unit': None

507}

508

509

510def get_meta(tdms_properties):

511 prop = tdms_properties

512

513 deltat = 1. / prop['SamplingFrequency[Hz]']

514 tmin = prop['GPSTimeStamp'].timestamp()

515

516 fibre_meta = {key: prop.get(key_map, -1)

517 for key, key_map in META_KEYS.items()

518 if key_map is not None}

519

520 coeff = fibre_meta['p_coefficients']

521 try:

522 coeff = tuple(map(float, coeff.split('\t')))

523 except ValueError:

524 coeff = tuple(map(float, coeff.split(';')))

525

526 gain = fibre_meta['receiver_gain']

527 try:

528 gain = tuple(map(float, gain.split('\t')))

529 except ValueError:

530 gain = tuple(map(float, gain.split(';')))

531 fibre_meta['receiver_gain'] = coeff

532

533 fibre_meta['unit'] = 'radians'

534

535 return deltat, tmin, fibre_meta

536

537

538def iload(filename, load_data=True):

539 tdms = TdmsReader(filename)

540 deltat, tmin, meta = get_meta(tdms.get_properties())

541

542 data = tdms.get_data().T.copy() if load_data else None

543

544 for icha in range(tdms.n_channels):

545 meta_cha = meta.copy()

546

547 assert icha < 99999

548 station = '%05i' % icha

549 meta_cha['channel'] = icha

550

551 nsamples = tdms.channel_length

552

553 tr = trace.Trace(

554 network='DA',

555 station=station,

556 ydata=None,

557 deltat=deltat,

558 tmin=tmin,

559 tmax=tmin + (nsamples - 1) * deltat,

560 meta=meta_cha)

561

562 if data is not None:

563 tr.set_ydata(data[icha])

564

565 yield tr

566

567

568def detect(first512):

569 return first512.startswith(b'TDSm.')

Coverage for /usr/local/lib/python3.9/dist-packages/pyrocko/io/tdms_idas.py : 76%

229 statements 175 run 54 missing 7 excluded