1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
5# python 2/3
6from __future__ import absolute_import
8import os
9import math
10import logging
12import numpy as num
14from pyrocko import trace, util, plot
15from pyrocko.guts import Object, Int, String, Timestamp
17from . import io_common
19logger = logging.getLogger(__name__)
21N_GPS_TAGS_WANTED = 200 # must match definition in datacube_ext.c
23APPLY_SUBSAMPLE_SHIFT_CORRECTION = True
26def color(c):
27 c = plot.color(c)
28 return tuple(x/255. for x in c)
31class DataCubeError(io_common.FileLoadError):
32 pass
35class ControlPointError(Exception):
36 pass
39def make_control_point(ipos_block, t_block, tref, deltat):
41 # reduce time (no drift would mean straight line)
42 tred = (t_block - tref) - ipos_block*deltat
44 # first round, remove outliers
45 q25, q75 = num.percentile(tred, (25., 75.))
46 iok = num.logical_and(q25 <= tred, tred <= q75)
48 # detrend
49 slope, offset = num.polyfit(ipos_block[iok], tred[iok], 1)
50 tred2 = tred - (offset + slope * ipos_block)
52 # second round, remove outliers based on detrended tred, refit
53 q25, q75 = num.percentile(tred2, (25., 75.))
54 iok = num.logical_and(q25 <= tred2, tred2 <= q75)
55 x = ipos_block[iok].copy()
56 ipos0 = x[0]
57 x -= ipos0
58 y = tred[iok].copy()
59 (slope, offset), cov = num.polyfit(x, y, 1, cov=True)
61 slope_err, offset_err = num.sqrt(num.diag(cov))
62 slope_err_limit = 1.0e-10
63 offset_err_limit = 5.0e-3
65 if slope_err > slope_err_limit:
66 raise ControlPointError('slope error too large')
68 if offset_err > offset_err_limit:
69 raise ControlPointError('offset error too large')
71 ic = ipos_block[ipos_block.size//2]
72 tc = offset + slope * (ic - ipos0)
74 return ic, tc + ic * deltat + tref
77def analyse_gps_tags(header, gps_tags, offset, nsamples):
79 ipos, t, fix, nsvs = gps_tags
80 deltat = 1.0 / int(header['S_RATE'])
82 tquartz = offset + ipos * deltat
84 toff = t - tquartz
85 toff_median = num.median(toff)
87 n = t.size
89 dtdt = (t[1:n] - t[0:n-1]) / (tquartz[1:n] - tquartz[0:n-1])
91 ok = abs(toff_median - toff) < 10.
93 xok = num.abs(dtdt - 1.0) < 0.00001
95 if ok.size >= 1:
97 ok[0] = False
98 ok[1:n] &= xok
99 ok[0:n-1] &= xok
100 ok[n-1] = False
102 ipos = ipos[ok]
103 t = t[ok]
104 fix = fix[ok]
105 nsvs = nsvs[ok]
107 blocksize = N_GPS_TAGS_WANTED // 2
109 try:
110 if ipos.size < blocksize:
111 raise ControlPointError(
112 'could not safely determine time corrections from gps')
114 j = 0
115 control_points = []
116 tref = num.median(t - ipos*deltat)
117 while j < ipos.size - blocksize:
118 ipos_block = ipos[j:j+blocksize]
119 t_block = t[j:j+blocksize]
120 try:
121 ic, tc = make_control_point(ipos_block, t_block, tref, deltat)
122 control_points.append((ic, tc))
123 except ControlPointError:
124 pass
125 j += blocksize
127 ipos_last = ipos[-blocksize:]
128 t_last = t[-blocksize:]
129 try:
130 ic, tc = make_control_point(ipos_last, t_last, tref, deltat)
131 control_points.append((ic, tc))
132 except ControlPointError:
133 pass
135 if len(control_points) < 2:
136 raise ControlPointError(
137 'could not safely determine time corrections from gps')
139 i0, t0 = control_points[0]
140 i1, t1 = control_points[1]
141 i2, t2 = control_points[-2]
142 i3, t3 = control_points[-1]
143 if len(control_points) == 2:
144 tmin = t0 - i0 * deltat - offset * deltat
145 tmax = t3 + (nsamples - i3 - 1) * deltat
146 else:
147 icontrol = num.array(
148 [x[0] for x in control_points], dtype=num.int64)
149 tcontrol = num.array(
150 [x[1] for x in control_points], dtype=float)
151 # robust against steps:
152 slope = num.median(
153 (tcontrol[1:] - tcontrol[:-1])
154 / (icontrol[1:] - icontrol[:-1]))
156 tmin = t0 + (offset - i0) * slope
157 tmax = t2 + (offset + nsamples - 1 - i2) * slope
159 if offset < i0:
160 control_points[0:0] = [(offset, tmin)]
162 if offset + nsamples - 1 > i3:
163 control_points.append((offset + nsamples - 1, tmax))
165 icontrol = num.array([x[0] for x in control_points], dtype=num.int64)
166 tcontrol = num.array([x[1] for x in control_points], dtype=float)
168 # corrected 2021-10-26: This sub-sample time shift introduced by the
169 # Cube's ADC was previously not recognized.
170 if APPLY_SUBSAMPLE_SHIFT_CORRECTION:
171 tcontrol += 0.199 * deltat + 0.0003
173 return tmin, tmax, icontrol, tcontrol, ok
175 except ControlPointError:
177 tmin = util.str_to_time(header['S_DATE'] + header['S_TIME'],
178 format='%y/%m/%d%H:%M:%S')
180 idat = int(header['DAT_NO'])
181 if idat == 0:
182 tmin = tmin + util.gps_utc_offset(tmin)
183 else:
184 tmin = util.day_start(tmin + idat * 24.*3600.) \
185 + util.gps_utc_offset(tmin)
187 tmax = tmin + (nsamples - 1) * deltat
188 icontrol, tcontrol = None, None
189 return tmin, tmax, icontrol, tcontrol, None
192def plot_gnss_location_timeline(fns):
193 from matplotlib import pyplot as plt
194 from pyrocko.orthodrome import latlon_to_ne_numpy
195 not_ = num.logical_not
196 h = 3600.
198 fig = plt.figure()
200 axes = []
201 for i in range(4):
202 axes.append(
203 fig.add_subplot(4, 1, i+1, sharex=axes[-1] if axes else None))
205 background_colors = [
206 color('aluminium1'),
207 color('aluminium2')]
209 tref = None
210 for ifn, fn in enumerate(fns):
211 header, gps_tags, nsamples = get_time_infos(fn)
212 _, t, fix, nsvs, lats, lons, elevations, _ = gps_tags
214 fix = fix.astype(bool)
216 if t.size < 2:
217 logger.warning('Need at least 2 gps tags for plotting: %s' % fn)
219 if tref is None:
220 tref = util.day_start(t[0])
221 lat, lon, elevation = coordinates_from_gps(gps_tags)
223 norths, easts = latlon_to_ne_numpy(lat, lon, lats, lons)
225 for ax, data in zip(axes, (norths, easts, elevations, nsvs)):
227 tspan = t[num.array([0, -1])]
229 ax.axvspan(*((tspan - tref) / h), color=background_colors[ifn % 2])
230 med = num.median(data)
231 ax.plot(
232 (tspan - tref) / h,
233 [med, med],
234 ls='--',
235 c='k',
236 lw=3,
237 alpha=0.5)
239 ax.plot(
240 (t[not_(fix)] - tref) / h, data[not_(fix)], 'o',
241 ms=1.5,
242 mew=0,
243 color=color('scarletred2'))
245 ax.plot(
246 (t[fix] - tref) / h, data[fix], 'o',
247 ms=1.5,
248 mew=0,
249 color=color('aluminium6'))
251 for ax in axes:
252 ax.grid(alpha=.3)
254 ax_lat, ax_lon, ax_elev, ax_nsv = axes
256 ax_lat.set_ylabel('Northing [m]')
257 ax_lon.set_ylabel('Easting [m]')
258 ax_elev.set_ylabel('Elevation [m]')
259 ax_nsv.set_ylabel('Number of Satellites')
261 ax_lat.get_xaxis().set_tick_params(labelbottom=False)
262 ax_lon.get_xaxis().set_tick_params(labelbottom=False)
263 ax_nsv.set_xlabel(
264 'Hours after %s' % util.time_to_str(tref, format='%Y-%m-%d'))
266 fig.suptitle(
267 u'Lat: %.5f\u00b0 Lon: %.5f\u00b0 Elevation: %g m' % (
268 lat, lon, elevation))
270 plt.show()
273def coordinates_from_gps(gps_tags):
274 ipos, t, fix, nsvs, lats, lons, elevations, temps = gps_tags
275 return tuple(num.median(x) for x in (lats, lons, elevations))
278def extract_stations(fns):
279 import io
280 import sys
281 from pyrocko.model import Station
282 from pyrocko.guts import dump_all
284 stations = {}
286 for fn in fns:
287 sta_name = os.path.splitext(fn)[1].lstrip('.')
288 if sta_name in stations:
289 logger.warning('Cube %s already in list!', sta_name)
290 continue
292 header, gps_tags, nsamples = get_time_infos(fn)
294 lat, lon, elevation = coordinates_from_gps(gps_tags)
296 sta = Station(
297 network='',
298 station=sta_name,
299 name=sta_name,
300 location='',
301 lat=lat,
302 lon=lon,
303 elevation=elevation)
305 stations[sta_name] = sta
307 f = io.BytesIO()
308 dump_all(stations.values(), stream=f)
309 sys.stdout.write(f.getvalue().decode())
312def plot_timeline(fns):
313 from matplotlib import pyplot as plt
315 fig = plt.figure()
316 axes = fig.gca()
318 h = 3600.
320 if isinstance(fns, str):
321 fn = fns
322 if os.path.isdir(fn):
323 fns = [
324 os.path.join(fn, entry) for entry in sorted(os.listdir(fn))]
326 ipos, t, fix, nsvs, header, offset, nsamples = \
327 get_timing_context(fns)
329 else:
330 ipos, t, fix, nsvs, header, offset, nsamples = \
331 get_extended_timing_context(fn)
333 else:
334 ipos, t, fix, nsvs, header, offset, nsamples = \
335 get_timing_context(fns)
337 deltat = 1.0 / int(header['S_RATE'])
339 tref = num.median(t - ipos * deltat)
340 tref = round(tref / deltat) * deltat
342 x = ipos*deltat
343 y = (t - tref) - ipos*deltat
345 bfix = fix != 0
346 bnofix = fix == 0
348 tmin, tmax, icontrol, tcontrol, ok = analyse_gps_tags(
349 header, (ipos, t, fix, nsvs), offset, nsamples)
351 la = num.logical_and
352 nok = num.logical_not(ok)
354 axes.plot(
355 x[la(bfix, ok)]/h, y[la(bfix, ok)], '+',
356 ms=5, color=color('chameleon3'))
357 axes.plot(
358 x[la(bfix, nok)]/h, y[la(bfix, nok)], '+',
359 ms=5, color=color('aluminium4'))
361 axes.plot(
362 x[la(bnofix, ok)]/h, y[la(bnofix, ok)], 'x',
363 ms=5, color=color('chocolate3'))
364 axes.plot(
365 x[la(bnofix, nok)]/h, y[la(bnofix, nok)], 'x',
366 ms=5, color=color('aluminium4'))
368 tred = tcontrol - icontrol*deltat - tref
369 axes.plot(icontrol*deltat/h, tred, color=color('aluminium6'))
370 axes.plot(icontrol*deltat/h, tred, 'o', ms=5, color=color('aluminium6'))
372 ymin = (math.floor(tred.min() / deltat)-1) * deltat
373 ymax = (math.ceil(tred.max() / deltat)+1) * deltat
375 # axes.set_ylim(ymin, ymax)
376 if ymax - ymin < 1000 * deltat:
377 ygrid = math.floor(tred.min() / deltat) * deltat
378 while ygrid < ymax:
379 axes.axhline(ygrid, color=color('aluminium4'), alpha=0.3)
380 ygrid += deltat
382 xmin = icontrol[0]*deltat/h
383 xmax = icontrol[-1]*deltat/h
384 xsize = xmax - xmin
385 xmin -= xsize * 0.1
386 xmax += xsize * 0.1
387 axes.set_xlim(xmin, xmax)
389 axes.set_ylim(ymin, ymax)
391 axes.set_xlabel('Uncorrected (quartz) time [h]')
392 axes.set_ylabel('Relative time correction [s]')
394 plt.show()
397g_dir_contexts = {}
400class DirContextEntry(Object):
401 path = String.T()
402 tstart = Timestamp.T()
403 ifile = Int.T()
406class DirContext(Object):
407 path = String.T()
408 mtime = Timestamp.T()
409 entries = DirContextEntry.T()
411 def get_entry(self, fn):
412 path = os.path.abspath(fn)
413 for entry in self.entries:
414 if entry.path == path:
415 return entry
417 raise Exception('entry not found')
419 def iter_entries(self, fn, step=1):
420 current = self.get_entry(fn)
421 by_ifile = dict(
422 (entry.ifile, entry) for entry in self.entries
423 if entry.tstart == current.tstart)
425 icurrent = current.ifile
426 while True:
427 icurrent += step
428 try:
429 yield by_ifile[icurrent]
431 except KeyError:
432 break
435def context(fn):
436 from pyrocko import datacube_ext
438 dpath = os.path.dirname(os.path.abspath(fn))
439 mtimes = [os.stat(dpath)[8]]
441 dentries = sorted([os.path.join(dpath, f) for f in os.listdir(dpath)
442 if os.path.isfile(os.path.join(dpath, f))])
443 for dentry in dentries:
444 fn2 = os.path.join(dpath, dentry)
445 mtimes.append(os.stat(fn2)[8])
447 mtime = float(max(mtimes))
449 if dpath in g_dir_contexts:
450 dir_context = g_dir_contexts[dpath]
451 if dir_context.mtime == mtime:
452 return dir_context
454 del g_dir_contexts[dpath]
456 entries = []
457 for dentry in dentries:
458 fn2 = os.path.join(dpath, dentry)
459 if not os.path.isfile(fn2):
460 continue
462 with open(fn2, 'rb') as f:
463 first512 = f.read(512)
464 if not detect(first512):
465 continue
467 with open(fn2, 'rb') as f:
468 try:
469 header, data_arrays, gps_tags, nsamples, _ = \
470 datacube_ext.load(f.fileno(), 3, 0, -1, None)
472 except datacube_ext.DataCubeError as e:
473 e = DataCubeError(str(e))
474 e.set_context('filename', fn)
475 raise e
477 header = dict(header)
478 entries.append(DirContextEntry(
479 path=os.path.abspath(fn2),
480 tstart=util.str_to_time(
481 '20' + header['S_DATE'] + ' ' + header['S_TIME'],
482 format='%Y/%m/%d %H:%M:%S'),
483 ifile=int(header['DAT_NO'])))
485 dir_context = DirContext(mtime=mtime, path=dpath, entries=entries)
487 return dir_context
490def get_time_infos(fn):
491 from pyrocko import datacube_ext
493 with open(fn, 'rb') as f:
494 try:
495 header, _, gps_tags, nsamples, _ = datacube_ext.load(
496 f.fileno(), 1, 0, -1, None)
498 except datacube_ext.DataCubeError as e:
499 e = DataCubeError(str(e))
500 e.set_context('filename', fn)
501 raise e
503 return dict(header), gps_tags, nsamples
506def get_timing_context(fns):
507 joined = [[], [], [], []]
508 ioff = 0
509 for fn in fns:
510 header, gps_tags, nsamples = get_time_infos(fn)
512 ipos = gps_tags[0]
513 ipos += ioff
515 for i in range(4):
516 joined[i].append(gps_tags[i])
518 ioff += nsamples
520 ipos, t, fix, nsvs = [num.concatenate(x) for x in joined]
522 nsamples = ioff
523 return ipos, t, fix, nsvs, header, 0, nsamples
526def get_extended_timing_context(fn):
527 c = context(fn)
529 header, gps_tags, nsamples_base = get_time_infos(fn)
531 ioff = 0
532 aggregated = [gps_tags]
534 nsamples_total = nsamples_base
536 if num.sum(gps_tags[2]) == 0:
538 ioff = nsamples_base
539 for entry in c.iter_entries(fn, 1):
541 _, gps_tags, nsamples = get_time_infos(entry.path)
543 ipos = gps_tags[0]
544 ipos += ioff
546 aggregated.append(gps_tags)
547 nsamples_total += nsamples
549 if num.sum(gps_tags[2]) > 0:
550 break
552 ioff += nsamples
554 ioff = 0
555 for entry in c.iter_entries(fn, -1):
557 _, gps_tags, nsamples = get_time_infos(entry.path)
559 ioff -= nsamples
561 ipos = gps_tags[0]
562 ipos += ioff
564 aggregated[0:0] = [gps_tags]
566 nsamples_total += nsamples
568 if num.sum(gps_tags[2]) > 0:
569 break
571 ipos, t, fix, nsvs = [num.concatenate(x) for x in zip(*aggregated)][:4]
573# return ipos, t, fix, nsvs, header, ioff, nsamples_total
574 return ipos, t, fix, nsvs, header, 0, nsamples_base
577def iload(fn, load_data=True, interpolation='sinc'):
578 from pyrocko import datacube_ext
579 from pyrocko import signal_ext
581 if interpolation not in ('sinc', 'off'):
582 raise NotImplementedError(
583 'no such interpolation method: %s' % interpolation)
585 with open(fn, 'rb') as f:
586 if load_data:
587 loadflag = 2
588 else:
589 loadflag = 1
591 try:
592 header, data_arrays, gps_tags, nsamples, _ = datacube_ext.load(
593 f.fileno(), loadflag, 0, -1, None)
595 except datacube_ext.DataCubeError as e:
596 e = DataCubeError(str(e))
597 e.set_context('filename', fn)
598 raise e
600 header = dict(header)
601 deltat = 1.0 / int(header['S_RATE'])
602 nchannels = int(header['CH_NUM'])
604 ipos, t, fix, nsvs, header_, offset_, nsamples_ = \
605 get_extended_timing_context(fn)
607 tmin, tmax, icontrol, tcontrol, _ = analyse_gps_tags(
608 header_, (ipos, t, fix, nsvs), offset_, nsamples_)
610 if icontrol is None:
611 logger.warn(
612 'No usable GPS timestamps found. Using datacube header '
613 'information to guess time. (file: "%s")' % fn)
615 tmin_ip = round(tmin / deltat) * deltat
616 if interpolation != 'off':
617 tmax_ip = round(tmax / deltat) * deltat
618 else:
619 tmax_ip = tmin_ip + (nsamples-1) * deltat
621 nsamples_ip = int(round((tmax_ip - tmin_ip)/deltat)) + 1
622 # to prevent problems with rounding errors:
623 tmax_ip = tmin_ip + (nsamples_ip-1) * deltat
625 leaps = num.array(
626 [x[0] + util.gps_utc_offset(x[0]) for x in util.read_leap_seconds2()],
627 dtype=float)
629 if load_data and icontrol is not None:
630 ncontrol_this = num.sum(
631 num.logical_and(0 <= icontrol, icontrol < nsamples))
633 if ncontrol_this <= 1:
634 logger.warn(
635 'Extrapolating GPS time information from directory context '
636 '(insufficient number of GPS timestamps in file: "%s").' % fn)
638 for i in range(nchannels):
639 if load_data:
640 arr = data_arrays[i]
641 assert arr.size == nsamples
643 if interpolation == 'sinc' and icontrol is not None:
645 ydata = num.empty(nsamples_ip, dtype=float)
646 try:
647 signal_ext.antidrift(
648 icontrol, tcontrol,
649 arr.astype(float), tmin_ip, deltat, ydata)
651 except signal_ext.Error as e:
652 e = DataCubeError(str(e))
653 e.set_context('filename', fn)
654 e.set_context('n_control_points', icontrol.size)
655 e.set_context('n_samples_raw', arr.size)
656 e.set_context('n_samples_ip', ydata.size)
657 e.set_context('tmin_ip', util.time_to_str(tmin_ip))
658 raise e
660 ydata = num.round(ydata).astype(arr.dtype)
661 else:
662 ydata = arr
664 tr_tmin = tmin_ip
665 tr_tmax = None
666 else:
667 ydata = None
668 tr_tmin = tmin_ip
669 tr_tmax = tmax_ip
671 tr = trace.Trace('', header['DEV_NO'], '', 'p%i' % i, deltat=deltat,
672 ydata=ydata, tmin=tr_tmin, tmax=tr_tmax, meta=header)
674 bleaps = num.logical_and(tmin_ip <= leaps, leaps < tmax_ip)
676 if num.any(bleaps):
677 assert num.sum(bleaps) == 1
678 tcut = leaps[bleaps][0]
680 for tmin_cut, tmax_cut in [
681 (tr.tmin, tcut), (tcut, tr.tmax+tr.deltat)]:
683 try:
684 tr_cut = tr.chop(tmin_cut, tmax_cut, inplace=False)
685 tr_cut.shift(
686 util.utc_gps_offset(0.5*(tr_cut.tmin+tr_cut.tmax)))
687 yield tr_cut
689 except trace.NoData:
690 pass
692 else:
693 tr.shift(util.utc_gps_offset(0.5*(tr.tmin+tr.tmax)))
694 yield tr
697header_keys = {
698 str: b'GIPP_V DEV_NO E_NAME GPS_PO S_TIME S_DATE DAT_NO'.split(),
699 int: b'''P_AMPL CH_NUM S_RATE D_FILT C_MODE A_CHOP F_TIME GPS_TI GPS_OF
700 A_FILT A_PHAS GPS_ON ACQ_ON V_TCXO D_VOLT E_VOLT'''.split()}
702all_header_keys = header_keys[str] + header_keys[int]
705def detect(first512):
706 s = first512
708 if len(s) < 512:
709 return False
711 if ord(s[0:1]) >> 4 != 15:
712 return False
714 n = s.find(b'\x80')
715 if n == -1:
716 n = len(s)
718 s = s[1:n]
719 s = s.replace(b'\xf0', b'')
720 s = s.replace(b';', b' ')
721 s = s.replace(b'=', b' ')
722 kvs = s.split(b' ')
724 if len([k for k in all_header_keys if k in kvs]) == 0:
725 return False
726 return True
729if __name__ == '__main__':
730 import argparse
731 parser = argparse.ArgumentParser(description='Datacube reader')
733 parser.add_argument(
734 'action', choices=['timeline', 'gnss', 'stations'],
735 help='Action')
736 parser.add_argument(
737 'files', nargs='+')
739 args = parser.parse_args()
740 if args.action == 'timeline':
741 plot_timeline(args.files)
743 elif args.action == 'gnss':
744 plot_gnss_location_timeline(args.files)
746 elif args.action == 'stations':
747 extract_stations(args.files)