1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
5# python 2/3
6from __future__ import absolute_import
8import os
9import math
10import logging
12import numpy as num
14from pyrocko import trace, util, plot
15from pyrocko.guts import Object, Int, String, Timestamp
17from . import io_common
19logger = logging.getLogger(__name__)
21N_GPS_TAGS_WANTED = 200 # must match definition in datacube_ext.c
23APPLY_SUBSAMPLE_SHIFT_CORRECTION = True
26def color(c):
27 c = plot.color(c)
28 return tuple(x/255. for x in c)
31class DataCubeError(io_common.FileLoadError):
32 pass
35class ControlPointError(Exception):
36 pass
39def make_control_point(ipos_block, t_block, tref, deltat):
41 # reduce time (no drift would mean straight line)
42 tred = (t_block - tref) - ipos_block*deltat
44 # first round, remove outliers
45 q25, q75 = num.percentile(tred, (25., 75.))
46 iok = num.logical_and(q25 <= tred, tred <= q75)
48 # detrend
49 slope, offset = num.polyfit(ipos_block[iok], tred[iok], 1)
50 tred2 = tred - (offset + slope * ipos_block)
52 # second round, remove outliers based on detrended tred, refit
53 q25, q75 = num.percentile(tred2, (25., 75.))
54 iok = num.logical_and(q25 <= tred2, tred2 <= q75)
55 x = ipos_block[iok].copy()
56 ipos0 = x[0]
57 x -= ipos0
58 y = tred[iok].copy()
59 (slope, offset), cov = num.polyfit(x, y, 1, cov=True)
61 slope_err, offset_err = num.sqrt(num.diag(cov))
63 slope_err_limit = 1.0e-10
64 if ipos_block.size < N_GPS_TAGS_WANTED // 2:
65 slope_err_limit *= (200. / ipos_block.size)
67 offset_err_limit = 5.0e-3
69 if slope_err > slope_err_limit:
70 raise ControlPointError(
71 'Slope error too large: %g (limit: %g' % (
72 slope_err, slope_err_limit))
74 if offset_err > offset_err_limit:
75 raise ControlPointError(
76 'Offset error too large: %g (limit: %g' % (
77 offset_err, offset_err_limit))
79 ic = ipos_block[ipos_block.size//2]
80 tc = offset + slope * (ic - ipos0)
82 return ic, tc + ic * deltat + tref
85def analyse_gps_tags(header, gps_tags, offset, nsamples):
87 ipos, t, fix, nsvs = gps_tags
88 deltat = 1.0 / int(header['S_RATE'])
90 tquartz = offset + ipos * deltat
92 toff = t - tquartz
93 toff_median = num.median(toff)
95 n = t.size
97 dtdt = (t[1:n] - t[0:n-1]) / (tquartz[1:n] - tquartz[0:n-1])
99 ok = abs(toff_median - toff) < 10.
101 xok = num.abs(dtdt - 1.0) < 0.00001
103 if ok.size >= 1:
105 ok[0] = False
106 ok[1:n] &= xok
107 ok[0:n-1] &= xok
108 ok[n-1] = False
110 ipos = ipos[ok]
111 t = t[ok]
112 fix = fix[ok]
113 nsvs = nsvs[ok]
115 blocksize = N_GPS_TAGS_WANTED // 2
116 if ipos.size < blocksize:
117 blocksize = max(10, ipos.size // 4)
118 logger.warning(
119 'Small number of GPS tags found. '
120 'Reducing analysis block size to %i tags. '
121 'Time correction may be unreliable.' % blocksize)
123 try:
124 if ipos.size < blocksize:
125 raise ControlPointError(
126 'Cannot determine GPS time correction: '
127 'Too few GPS tags found: %i' % ipos.size)
129 j = 0
130 control_points = []
131 tref = num.median(t - ipos*deltat)
132 while j < ipos.size - blocksize:
133 ipos_block = ipos[j:j+blocksize]
134 t_block = t[j:j+blocksize]
135 try:
136 ic, tc = make_control_point(ipos_block, t_block, tref, deltat)
137 control_points.append((ic, tc))
138 except ControlPointError as e:
139 logger.debug(str(e))
141 j += blocksize
143 ipos_last = ipos[-blocksize:]
144 t_last = t[-blocksize:]
145 try:
146 ic, tc = make_control_point(ipos_last, t_last, tref, deltat)
147 control_points.append((ic, tc))
148 except ControlPointError as e:
149 logger.debug(str(e))
151 if len(control_points) < 2:
152 raise ControlPointError(
153 'Could not safely determine time corrections from GPS: '
154 'unable to construct two or more control points')
156 i0, t0 = control_points[0]
157 i1, t1 = control_points[1]
158 i2, t2 = control_points[-2]
159 i3, t3 = control_points[-1]
160 if len(control_points) == 2:
161 tmin = t0 - i0 * deltat - offset * deltat
162 tmax = t3 + (nsamples - i3 - 1) * deltat
163 else:
164 icontrol = num.array(
165 [x[0] for x in control_points], dtype=num.int64)
166 tcontrol = num.array(
167 [x[1] for x in control_points], dtype=float)
168 # robust against steps:
169 slope = num.median(
170 (tcontrol[1:] - tcontrol[:-1])
171 / (icontrol[1:] - icontrol[:-1]))
173 tmin = t0 + (offset - i0) * slope
174 tmax = t2 + (offset + nsamples - 1 - i2) * slope
176 if offset < i0:
177 control_points[0:0] = [(offset, tmin)]
179 if offset + nsamples - 1 > i3:
180 control_points.append((offset + nsamples - 1, tmax))
182 icontrol = num.array([x[0] for x in control_points], dtype=num.int64)
183 tcontrol = num.array([x[1] for x in control_points], dtype=float)
185 # corrected 2021-10-26: This sub-sample time shift introduced by the
186 # Cube's ADC was previously not recognized.
187 if APPLY_SUBSAMPLE_SHIFT_CORRECTION:
188 tcontrol -= 0.199 * deltat + 0.0003
190 return tmin, tmax, icontrol, tcontrol, ok
192 except ControlPointError as e:
193 logger.error(str(e))
195 tmin = util.str_to_time(header['S_DATE'] + header['S_TIME'],
196 format='%y/%m/%d%H:%M:%S')
198 idat = int(header['DAT_NO'])
199 if idat == 0:
200 tmin = tmin + util.gps_utc_offset(tmin)
201 else:
202 tmin = util.day_start(tmin + idat * 24.*3600.) \
203 + util.gps_utc_offset(tmin)
205 tmax = tmin + (nsamples - 1) * deltat
206 icontrol = num.array([offset, offset+nsamples - 1], dtype=num.int64)
207 tcontrol = num.array([tmin, tmax])
208 return tmin, tmax, icontrol, tcontrol, ok
211def plot_gnss_location_timeline(fns):
212 from matplotlib import pyplot as plt
213 from pyrocko.orthodrome import latlon_to_ne_numpy
214 not_ = num.logical_not
215 h = 3600.
217 fig = plt.figure()
219 axes = []
220 for i in range(4):
221 axes.append(
222 fig.add_subplot(4, 1, i+1, sharex=axes[-1] if axes else None))
224 background_colors = [
225 color('aluminium1'),
226 color('aluminium2')]
228 tref = None
229 for ifn, fn in enumerate(fns):
230 header, gps_tags, nsamples = get_time_infos(fn)
231 _, t, fix, nsvs, lats, lons, elevations, _ = gps_tags
233 fix = fix.astype(bool)
235 if t.size < 2:
236 logger.warning('Need at least 2 gps tags for plotting: %s' % fn)
238 if tref is None:
239 tref = util.day_start(t[0])
240 lat, lon, elevation = coordinates_from_gps(gps_tags)
242 norths, easts = latlon_to_ne_numpy(lat, lon, lats, lons)
244 for ax, data in zip(axes, (norths, easts, elevations, nsvs)):
246 tspan = t[num.array([0, -1])]
248 ax.axvspan(*((tspan - tref) / h), color=background_colors[ifn % 2])
249 med = num.median(data)
250 ax.plot(
251 (tspan - tref) / h,
252 [med, med],
253 ls='--',
254 c='k',
255 lw=3,
256 alpha=0.5)
258 ax.plot(
259 (t[not_(fix)] - tref) / h, data[not_(fix)], 'o',
260 ms=1.5,
261 mew=0,
262 color=color('scarletred2'))
264 ax.plot(
265 (t[fix] - tref) / h, data[fix], 'o',
266 ms=1.5,
267 mew=0,
268 color=color('aluminium6'))
270 for ax in axes:
271 ax.grid(alpha=.3)
273 ax_lat, ax_lon, ax_elev, ax_nsv = axes
275 ax_lat.set_ylabel('Northing [m]')
276 ax_lon.set_ylabel('Easting [m]')
277 ax_elev.set_ylabel('Elevation [m]')
278 ax_nsv.set_ylabel('Number of Satellites')
280 ax_lat.get_xaxis().set_tick_params(labelbottom=False)
281 ax_lon.get_xaxis().set_tick_params(labelbottom=False)
282 ax_nsv.set_xlabel(
283 'Hours after %s' % util.time_to_str(tref, format='%Y-%m-%d'))
285 fig.suptitle(
286 u'Lat: %.5f\u00b0 Lon: %.5f\u00b0 Elevation: %g m' % (
287 lat, lon, elevation))
289 plt.show()
292def coordinates_from_gps(gps_tags):
293 ipos, t, fix, nsvs, lats, lons, elevations, temps = gps_tags
294 return tuple(num.median(x) for x in (lats, lons, elevations))
297def extract_stations(fns):
298 import io
299 import sys
300 from pyrocko.model import Station
301 from pyrocko.guts import dump_all
303 stations = {}
305 for fn in fns:
306 sta_name = os.path.splitext(fn)[1].lstrip('.')
307 if sta_name in stations:
308 logger.warning('Cube %s already in list!', sta_name)
309 continue
311 header, gps_tags, nsamples = get_time_infos(fn)
313 lat, lon, elevation = coordinates_from_gps(gps_tags)
315 sta = Station(
316 network='',
317 station=sta_name,
318 name=sta_name,
319 location='',
320 lat=lat,
321 lon=lon,
322 elevation=elevation)
324 stations[sta_name] = sta
326 f = io.BytesIO()
327 dump_all(stations.values(), stream=f)
328 sys.stdout.write(f.getvalue().decode())
331def plot_timeline(fns):
332 from matplotlib import pyplot as plt
334 fig = plt.figure()
335 axes = fig.gca()
337 h = 3600.
339 if isinstance(fns, str):
340 fn = fns
341 if os.path.isdir(fn):
342 fns = [
343 os.path.join(fn, entry) for entry in sorted(os.listdir(fn))]
345 ipos, t, fix, nsvs, header, offset, nsamples = \
346 get_timing_context(fns)
348 else:
349 ipos, t, fix, nsvs, header, offset, nsamples = \
350 get_extended_timing_context(fn)
352 else:
353 ipos, t, fix, nsvs, header, offset, nsamples = \
354 get_timing_context(fns)
356 deltat = 1.0 / int(header['S_RATE'])
358 tref = num.median(t - ipos * deltat)
359 tref = round(tref / deltat) * deltat
361 if APPLY_SUBSAMPLE_SHIFT_CORRECTION:
362 tcorr = 0.199 * deltat + 0.0003
363 else:
364 tcorr = 0.0
366 x = ipos*deltat
367 y = (t - tref) - ipos*deltat - tcorr
369 bfix = fix != 0
370 bnofix = fix == 0
372 tmin, tmax, icontrol, tcontrol, ok = analyse_gps_tags(
373 header, (ipos, t, fix, nsvs), offset, nsamples)
375 la = num.logical_and
376 nok = num.logical_not(ok)
378 axes.plot(
379 x[la(bfix, ok)]/h, y[la(bfix, ok)], '+',
380 ms=5, color=color('chameleon3'))
381 axes.plot(
382 x[la(bfix, nok)]/h, y[la(bfix, nok)], '+',
383 ms=5, color=color('aluminium4'))
385 axes.plot(
386 x[la(bnofix, ok)]/h, y[la(bnofix, ok)], 'x',
387 ms=5, color=color('chocolate3'))
388 axes.plot(
389 x[la(bnofix, nok)]/h, y[la(bnofix, nok)], 'x',
390 ms=5, color=color('aluminium4'))
392 tred = tcontrol - icontrol*deltat - tref
393 axes.plot(icontrol*deltat/h, tred, color=color('aluminium6'))
394 axes.plot(icontrol*deltat/h, tred, 'o', ms=5, color=color('aluminium6'))
396 ymin = (math.floor(tred.min() / deltat)-1) * deltat
397 ymax = (math.ceil(tred.max() / deltat)+1) * deltat
398 # ymin = min(ymin, num.min(y))
399 # ymax = max(ymax, num.max(y))
401 if ymax - ymin < 1000 * deltat:
402 ygrid = math.floor(tred.min() / deltat) * deltat
403 while ygrid < ymax:
404 axes.axhline(ygrid, color=color('aluminium4'), alpha=0.3)
405 ygrid += deltat
407 xmin = icontrol[0]*deltat/h
408 xmax = icontrol[-1]*deltat/h
409 xsize = xmax - xmin
410 xmin -= xsize * 0.1
411 xmax += xsize * 0.1
412 axes.set_xlim(xmin, xmax)
414 axes.set_ylim(ymin, ymax)
416 axes.set_xlabel('Uncorrected (quartz) time [h]')
417 axes.set_ylabel('Relative time correction [s]')
419 plt.show()
422g_dir_contexts = {}
425class DirContextEntry(Object):
426 path = String.T()
427 tstart = Timestamp.T()
428 ifile = Int.T()
431class DirContext(Object):
432 path = String.T()
433 mtime = Timestamp.T()
434 entries = DirContextEntry.T()
436 def get_entry(self, fn):
437 path = os.path.abspath(fn)
438 for entry in self.entries:
439 if entry.path == path:
440 return entry
442 raise Exception('entry not found')
444 def iter_entries(self, fn, step=1):
445 current = self.get_entry(fn)
446 by_ifile = dict(
447 (entry.ifile, entry) for entry in self.entries
448 if entry.tstart == current.tstart)
450 icurrent = current.ifile
451 while True:
452 icurrent += step
453 try:
454 yield by_ifile[icurrent]
456 except KeyError:
457 break
460def context(fn):
461 from pyrocko import datacube_ext
463 dpath = os.path.dirname(os.path.abspath(fn))
464 mtimes = [os.stat(dpath)[8]]
466 dentries = sorted([os.path.join(dpath, f) for f in os.listdir(dpath)
467 if os.path.isfile(os.path.join(dpath, f))])
468 for dentry in dentries:
469 fn2 = os.path.join(dpath, dentry)
470 mtimes.append(os.stat(fn2)[8])
472 mtime = float(max(mtimes))
474 if dpath in g_dir_contexts:
475 dir_context = g_dir_contexts[dpath]
476 if dir_context.mtime == mtime:
477 return dir_context
479 del g_dir_contexts[dpath]
481 entries = []
482 for dentry in dentries:
483 fn2 = os.path.join(dpath, dentry)
484 if not os.path.isfile(fn2):
485 continue
487 with open(fn2, 'rb') as f:
488 first512 = f.read(512)
489 if not detect(first512):
490 continue
492 with open(fn2, 'rb') as f:
493 try:
494 header, data_arrays, gps_tags, nsamples, _ = \
495 datacube_ext.load(f.fileno(), 3, 0, -1, None)
497 except datacube_ext.DataCubeError as e:
498 e = DataCubeError(str(e))
499 e.set_context('filename', fn)
500 raise e
502 header = dict(header)
503 entries.append(DirContextEntry(
504 path=os.path.abspath(fn2),
505 tstart=util.str_to_time(
506 '20' + header['S_DATE'] + ' ' + header['S_TIME'],
507 format='%Y/%m/%d %H:%M:%S'),
508 ifile=int(header['DAT_NO'])))
510 dir_context = DirContext(mtime=mtime, path=dpath, entries=entries)
512 return dir_context
515def get_time_infos(fn):
516 from pyrocko import datacube_ext
518 with open(fn, 'rb') as f:
519 try:
520 header, _, gps_tags, nsamples, _ = datacube_ext.load(
521 f.fileno(), 1, 0, -1, None)
523 except datacube_ext.DataCubeError as e:
524 e = DataCubeError(str(e))
525 e.set_context('filename', fn)
526 raise e
528 return dict(header), gps_tags, nsamples
531def get_timing_context(fns):
532 joined = [[], [], [], []]
533 ioff = 0
534 for fn in fns:
535 header, gps_tags, nsamples = get_time_infos(fn)
537 ipos = gps_tags[0]
538 ipos += ioff
540 for i in range(4):
541 joined[i].append(gps_tags[i])
543 ioff += nsamples
545 ipos, t, fix, nsvs = [num.concatenate(x) for x in joined]
547 nsamples = ioff
548 return ipos, t, fix, nsvs, header, 0, nsamples
551def get_extended_timing_context(fn):
552 c = context(fn)
554 header, gps_tags, nsamples_base = get_time_infos(fn)
556 ioff = 0
557 aggregated = [gps_tags]
559 nsamples_total = nsamples_base
561 if num.sum(gps_tags[2]) == 0:
563 ioff = nsamples_base
564 for entry in c.iter_entries(fn, 1):
566 _, gps_tags, nsamples = get_time_infos(entry.path)
568 ipos = gps_tags[0]
569 ipos += ioff
571 aggregated.append(gps_tags)
572 nsamples_total += nsamples
574 if num.sum(gps_tags[2]) > 0:
575 break
577 ioff += nsamples
579 ioff = 0
580 for entry in c.iter_entries(fn, -1):
582 _, gps_tags, nsamples = get_time_infos(entry.path)
584 ioff -= nsamples
586 ipos = gps_tags[0]
587 ipos += ioff
589 aggregated[0:0] = [gps_tags]
591 nsamples_total += nsamples
593 if num.sum(gps_tags[2]) > 0:
594 break
596 ipos, t, fix, nsvs = [num.concatenate(x) for x in zip(*aggregated)][:4]
598# return ipos, t, fix, nsvs, header, ioff, nsamples_total
599 return ipos, t, fix, nsvs, header, 0, nsamples_base
602def iload(fn, load_data=True, interpolation='sinc'):
603 from pyrocko import datacube_ext
604 from pyrocko import signal_ext
606 if interpolation not in ('sinc', 'off'):
607 raise NotImplementedError(
608 'no such interpolation method: %s' % interpolation)
610 with open(fn, 'rb') as f:
611 if load_data:
612 loadflag = 2
613 else:
614 loadflag = 1
616 try:
617 header, data_arrays, gps_tags, nsamples, _ = datacube_ext.load(
618 f.fileno(), loadflag, 0, -1, None)
620 except datacube_ext.DataCubeError as e:
621 e = DataCubeError(str(e))
622 e.set_context('filename', fn)
623 raise e
625 header = dict(header)
626 deltat = 1.0 / int(header['S_RATE'])
627 nchannels = int(header['CH_NUM'])
629 ipos, t, fix, nsvs, header_, offset_, nsamples_ = \
630 get_extended_timing_context(fn)
632 tmin, tmax, icontrol, tcontrol, _ = analyse_gps_tags(
633 header_, (ipos, t, fix, nsvs), offset_, nsamples_)
635 if icontrol is None:
636 logger.warning(
637 'No usable GPS timestamps found. Using datacube header '
638 'information to guess time. (file: "%s")' % fn)
640 tmin_ip = round(tmin / deltat) * deltat
641 if interpolation != 'off':
642 tmax_ip = round(tmax / deltat) * deltat
643 else:
644 tmax_ip = tmin_ip + (nsamples-1) * deltat
646 nsamples_ip = int(round((tmax_ip - tmin_ip)/deltat)) + 1
647 # to prevent problems with rounding errors:
648 tmax_ip = tmin_ip + (nsamples_ip-1) * deltat
650 leaps = num.array(
651 [x[0] + util.gps_utc_offset(x[0]) for x in util.read_leap_seconds2()],
652 dtype=float)
654 if load_data and icontrol is not None:
655 ncontrol_this = num.sum(
656 num.logical_and(0 <= icontrol, icontrol < nsamples))
658 if ncontrol_this <= 1:
659 logger.warning(
660 'Extrapolating GPS time information from directory context '
661 '(insufficient number of GPS timestamps in file: "%s").' % fn)
663 for i in range(nchannels):
664 if load_data:
665 arr = data_arrays[i]
666 assert arr.size == nsamples
668 if interpolation == 'sinc' and icontrol is not None:
670 ydata = num.empty(nsamples_ip, dtype=float)
671 try:
672 signal_ext.antidrift(
673 icontrol, tcontrol,
674 arr.astype(float), tmin_ip, deltat, ydata)
676 except signal_ext.Error as e:
677 e = DataCubeError(str(e))
678 e.set_context('filename', fn)
679 e.set_context('n_control_points', icontrol.size)
680 e.set_context('n_samples_raw', arr.size)
681 e.set_context('n_samples_ip', ydata.size)
682 e.set_context('tmin_ip', util.time_to_str(tmin_ip))
683 raise e
685 ydata = num.round(ydata).astype(arr.dtype)
686 else:
687 ydata = arr
689 tr_tmin = tmin_ip
690 tr_tmax = None
691 else:
692 ydata = None
693 tr_tmin = tmin_ip
694 tr_tmax = tmax_ip
696 tr = trace.Trace('', header['DEV_NO'], '', 'p%i' % i, deltat=deltat,
697 ydata=ydata, tmin=tr_tmin, tmax=tr_tmax, meta=header)
699 bleaps = num.logical_and(tmin_ip <= leaps, leaps < tmax_ip)
701 if num.any(bleaps):
702 assert num.sum(bleaps) == 1
703 tcut = leaps[bleaps][0]
705 for tmin_cut, tmax_cut in [
706 (tr.tmin, tcut), (tcut, tr.tmax+tr.deltat)]:
708 try:
709 tr_cut = tr.chop(tmin_cut, tmax_cut, inplace=False)
710 tr_cut.shift(
711 util.utc_gps_offset(0.5*(tr_cut.tmin+tr_cut.tmax)))
712 yield tr_cut
714 except trace.NoData:
715 pass
717 else:
718 tr.shift(util.utc_gps_offset(0.5*(tr.tmin+tr.tmax)))
719 yield tr
722header_keys = {
723 str: b'GIPP_V DEV_NO E_NAME GPS_PO S_TIME S_DATE DAT_NO'.split(),
724 int: b'''P_AMPL CH_NUM S_RATE D_FILT C_MODE A_CHOP F_TIME GPS_TI GPS_OF
725 A_FILT A_PHAS GPS_ON ACQ_ON V_TCXO D_VOLT E_VOLT'''.split()}
727all_header_keys = header_keys[str] + header_keys[int]
730def detect(first512):
731 s = first512
733 if len(s) < 512:
734 return False
736 if ord(s[0:1]) >> 4 != 15:
737 return False
739 n = s.find(b'\x80')
740 if n == -1:
741 n = len(s)
743 s = s[1:n]
744 s = s.replace(b'\xf0', b'')
745 s = s.replace(b';', b' ')
746 s = s.replace(b'=', b' ')
747 kvs = s.split(b' ')
749 if len([k for k in all_header_keys if k in kvs]) == 0:
750 return False
751 return True
754if __name__ == '__main__':
755 import argparse
756 parser = argparse.ArgumentParser(description='Datacube reader')
758 parser.add_argument(
759 'action', choices=['timeline', 'gnss', 'stations'],
760 help='Action')
761 parser.add_argument(
762 'files', nargs='+')
764 parser.add_argument(
765 '--loglevel', '-l',
766 choices=['critical', 'error', 'warning', 'info', 'debug'],
767 default='info',
768 help='Set logger level. Default: %(default)s')
770 args = parser.parse_args()
772 util.setup_logging('pyrocko.io.datacube', args.loglevel)
773 logging.getLogger('matplotlib.font_manager').disabled = True
775 if args.action == 'timeline':
776 plot_timeline(args.files)
778 elif args.action == 'gnss':
779 plot_gnss_location_timeline(args.files)
781 elif args.action == 'stations':
782 extract_stations(args.files)