1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6import os
7import math
8import logging
10import numpy as num
12from pyrocko import trace, util, plot
13from pyrocko.guts import Object, Int, String, Timestamp
15from . import io_common
17logger = logging.getLogger(__name__)
19N_GPS_TAGS_WANTED = 200 # must match definition in datacube_ext.c
21APPLY_SUBSAMPLE_SHIFT_CORRECTION = True
24def color(c):
25 c = plot.color(c)
26 return tuple(x/255. for x in c)
29class DataCubeError(io_common.FileLoadError):
30 pass
33class ControlPointError(Exception):
34 pass
37def make_control_point(ipos_block, t_block, tref, deltat):
39 # reduce time (no drift would mean straight line)
40 tred = (t_block - tref) - ipos_block*deltat
42 # first round, remove outliers
43 q25, q75 = num.percentile(tred, (25., 75.))
44 iok = num.logical_and(q25 <= tred, tred <= q75)
46 # detrend
47 slope, offset = num.polyfit(ipos_block[iok], tred[iok], 1)
48 tred2 = tred - (offset + slope * ipos_block)
50 # second round, remove outliers based on detrended tred, refit
51 q25, q75 = num.percentile(tred2, (25., 75.))
52 iok = num.logical_and(q25 <= tred2, tred2 <= q75)
53 x = ipos_block[iok].copy()
54 ipos0 = x[0]
55 x -= ipos0
56 y = tred[iok].copy()
57 if x.size < 2:
58 raise ControlPointError('Insufficient number control points after QC.')
60 elif x.size < 5: # needed for older numpy versions
61 (slope, offset) = num.polyfit(x, y, 1)
62 else:
63 (slope, offset), cov = num.polyfit(x, y, 1, cov=True)
65 slope_err, offset_err = num.sqrt(num.diag(cov))
67 slope_err_limit = 1.0e-10
68 if ipos_block.size < N_GPS_TAGS_WANTED // 2:
69 slope_err_limit *= (200. / ipos_block.size)
71 offset_err_limit = 5.0e-3
73 if slope_err > slope_err_limit:
74 raise ControlPointError(
75 'Slope error too large: %g (limit: %g' % (
76 slope_err, slope_err_limit))
78 if offset_err > offset_err_limit:
79 raise ControlPointError(
80 'Offset error too large: %g (limit: %g' % (
81 offset_err, offset_err_limit))
83 ic = ipos_block[ipos_block.size//2]
84 tc = offset + slope * (ic - ipos0)
86 return ic, tc + ic * deltat + tref
89def analyse_gps_tags(header, gps_tags, offset, nsamples):
91 ipos, t, fix, nsvs = gps_tags
92 deltat = 1.0 / int(header['S_RATE'])
94 tquartz = offset + ipos * deltat
96 toff = t - tquartz
97 toff_median = num.median(toff)
99 n = t.size
101 dtdt = (t[1:n] - t[0:n-1]) / (tquartz[1:n] - tquartz[0:n-1])
103 ok = abs(toff_median - toff) < 10.
105 xok = num.abs(dtdt - 1.0) < 0.00001
107 if ok.size >= 1:
109 ok[0] = False
110 ok[1:n] &= xok
111 ok[0:n-1] &= xok
112 ok[n-1] = False
114 ipos = ipos[ok]
115 t = t[ok]
116 fix = fix[ok]
117 nsvs = nsvs[ok]
119 blocksize = N_GPS_TAGS_WANTED // 2
120 if ipos.size < blocksize:
121 blocksize = max(10, ipos.size // 4)
122 logger.warning(
123 'Small number of GPS tags found. '
124 'Reducing analysis block size to %i tags. '
125 'Time correction may be unreliable.' % blocksize)
127 try:
128 if ipos.size < blocksize:
129 raise ControlPointError(
130 'Cannot determine GPS time correction: '
131 'Too few GPS tags found: %i' % ipos.size)
133 j = 0
134 control_points = []
135 tref = num.median(t - ipos*deltat)
136 while j < ipos.size - blocksize:
137 ipos_block = ipos[j:j+blocksize]
138 t_block = t[j:j+blocksize]
139 try:
140 ic, tc = make_control_point(ipos_block, t_block, tref, deltat)
141 control_points.append((ic, tc))
142 except ControlPointError as e:
143 logger.debug(str(e))
145 j += blocksize
147 ipos_last = ipos[-blocksize:]
148 t_last = t[-blocksize:]
149 try:
150 ic, tc = make_control_point(ipos_last, t_last, tref, deltat)
151 control_points.append((ic, tc))
152 except ControlPointError as e:
153 logger.debug(str(e))
155 if len(control_points) < 2:
156 raise ControlPointError(
157 'Could not safely determine time corrections from GPS: '
158 'unable to construct two or more control points')
160 i0, t0 = control_points[0]
161 i1, t1 = control_points[1]
162 i2, t2 = control_points[-2]
163 i3, t3 = control_points[-1]
164 if len(control_points) == 2:
165 tmin = t0 - i0 * deltat - offset * deltat
166 tmax = t3 + (nsamples - i3 - 1) * deltat
167 else:
168 icontrol = num.array(
169 [x[0] for x in control_points], dtype=num.int64)
170 tcontrol = num.array(
171 [x[1] for x in control_points], dtype=float)
172 # robust against steps:
173 slope = num.median(
174 (tcontrol[1:] - tcontrol[:-1])
175 / (icontrol[1:] - icontrol[:-1]))
177 tmin = t0 + (offset - i0) * slope
178 tmax = t2 + (offset + nsamples - 1 - i2) * slope
180 if offset < i0:
181 control_points[0:0] = [(offset, tmin)]
183 if offset + nsamples - 1 > i3:
184 control_points.append((offset + nsamples - 1, tmax))
186 icontrol = num.array([x[0] for x in control_points], dtype=num.int64)
187 tcontrol = num.array([x[1] for x in control_points], dtype=float)
189 # corrected 2021-10-26: This sub-sample time shift introduced by the
190 # Cube's ADC was previously not recognized.
191 if APPLY_SUBSAMPLE_SHIFT_CORRECTION:
192 tcontrol -= 0.199 * deltat + 0.0003
194 return tmin, tmax, icontrol, tcontrol, ok
196 except ControlPointError as e:
197 logger.error(str(e))
199 tmin = util.str_to_time(header['S_DATE'] + header['S_TIME'],
200 format='%y/%m/%d%H:%M:%S')
202 idat = int(header['DAT_NO'])
203 if idat == 0:
204 tmin = tmin + util.gps_utc_offset(tmin)
205 else:
206 tmin = util.day_start(tmin + idat * 24.*3600.) \
207 + util.gps_utc_offset(tmin)
209 tmax = tmin + (nsamples - 1) * deltat
210 icontrol = num.array([offset, offset+nsamples - 1], dtype=num.int64)
211 tcontrol = num.array([tmin, tmax])
212 return tmin, tmax, icontrol, tcontrol, ok
215def plot_gnss_location_timeline(fns):
216 from matplotlib import pyplot as plt
217 from pyrocko.orthodrome import latlon_to_ne_numpy
218 not_ = num.logical_not
219 h = 3600.
221 fig = plt.figure()
223 axes = []
224 for i in range(4):
225 axes.append(
226 fig.add_subplot(4, 1, i+1, sharex=axes[-1] if axes else None))
228 background_colors = [
229 color('aluminium1'),
230 color('aluminium2')]
232 tref = None
233 for ifn, fn in enumerate(fns):
234 header, gps_tags, nsamples = get_time_infos(fn)
235 _, t, fix, nsvs, lats, lons, elevations, _ = gps_tags
237 fix = fix.astype(bool)
239 if t.size < 2:
240 logger.warning('Need at least 2 gps tags for plotting: %s' % fn)
242 if tref is None:
243 tref = util.day_start(t[0])
244 lat, lon, elevation = coordinates_from_gps(gps_tags)
246 norths, easts = latlon_to_ne_numpy(lat, lon, lats, lons)
248 for ax, data in zip(axes, (norths, easts, elevations, nsvs)):
250 tspan = t[num.array([0, -1])]
252 ax.axvspan(*((tspan - tref) / h), color=background_colors[ifn % 2])
253 med = num.median(data)
254 ax.plot(
255 (tspan - tref) / h,
256 [med, med],
257 ls='--',
258 c='k',
259 lw=3,
260 alpha=0.5)
262 ax.plot(
263 (t[not_(fix)] - tref) / h, data[not_(fix)], 'o',
264 ms=1.5,
265 mew=0,
266 color=color('scarletred2'))
268 ax.plot(
269 (t[fix] - tref) / h, data[fix], 'o',
270 ms=1.5,
271 mew=0,
272 color=color('aluminium6'))
274 for ax in axes:
275 ax.grid(alpha=.3)
277 ax_lat, ax_lon, ax_elev, ax_nsv = axes
279 ax_lat.set_ylabel('Northing [m]')
280 ax_lon.set_ylabel('Easting [m]')
281 ax_elev.set_ylabel('Elevation [m]')
282 ax_nsv.set_ylabel('Number of Satellites')
284 ax_lat.get_xaxis().set_tick_params(labelbottom=False)
285 ax_lon.get_xaxis().set_tick_params(labelbottom=False)
286 ax_nsv.set_xlabel(
287 'Hours after %s' % util.time_to_str(tref, format='%Y-%m-%d'))
289 fig.suptitle(
290 u'Lat: %.5f\u00b0 Lon: %.5f\u00b0 Elevation: %g m' % (
291 lat, lon, elevation))
293 plt.show()
296def coordinates_from_gps(gps_tags):
297 ipos, t, fix, nsvs, lats, lons, elevations, temps = gps_tags
298 return tuple(num.median(x) for x in (lats, lons, elevations))
301def extract_stations(fns):
302 import io
303 import sys
304 from pyrocko.model import Station
305 from pyrocko.guts import dump_all
307 stations = {}
309 for fn in fns:
310 sta_name = os.path.splitext(fn)[1].lstrip('.')
311 if sta_name in stations:
312 logger.warning('Cube %s already in list!', sta_name)
313 continue
315 header, gps_tags, nsamples = get_time_infos(fn)
317 lat, lon, elevation = coordinates_from_gps(gps_tags)
319 sta = Station(
320 network='',
321 station=sta_name,
322 name=sta_name,
323 location='',
324 lat=lat,
325 lon=lon,
326 elevation=elevation)
328 stations[sta_name] = sta
330 f = io.BytesIO()
331 dump_all(stations.values(), stream=f)
332 sys.stdout.write(f.getvalue().decode())
335def plot_timeline(fns):
336 from matplotlib import pyplot as plt
338 fig = plt.figure()
339 axes = fig.gca()
341 h = 3600.
343 if isinstance(fns, str):
344 fn = fns
345 if os.path.isdir(fn):
346 fns = [
347 os.path.join(fn, entry) for entry in sorted(os.listdir(fn))]
349 ipos, t, fix, nsvs, header, offset, nsamples = \
350 get_timing_context(fns)
352 else:
353 ipos, t, fix, nsvs, header, offset, nsamples = \
354 get_extended_timing_context(fn)
356 else:
357 ipos, t, fix, nsvs, header, offset, nsamples = \
358 get_timing_context(fns)
360 deltat = 1.0 / int(header['S_RATE'])
362 tref = num.median(t - ipos * deltat)
363 tref = round(tref / deltat) * deltat
365 if APPLY_SUBSAMPLE_SHIFT_CORRECTION:
366 tcorr = 0.199 * deltat + 0.0003
367 else:
368 tcorr = 0.0
370 x = ipos*deltat
371 y = (t - tref) - ipos*deltat - tcorr
373 bfix = fix != 0
374 bnofix = fix == 0
376 tmin, tmax, icontrol, tcontrol, ok = analyse_gps_tags(
377 header, (ipos, t, fix, nsvs), offset, nsamples)
379 la = num.logical_and
380 nok = num.logical_not(ok)
382 axes.plot(
383 x[la(bfix, ok)]/h, y[la(bfix, ok)], '+',
384 ms=5, color=color('chameleon3'))
385 axes.plot(
386 x[la(bfix, nok)]/h, y[la(bfix, nok)], '+',
387 ms=5, color=color('aluminium4'))
389 axes.plot(
390 x[la(bnofix, ok)]/h, y[la(bnofix, ok)], 'x',
391 ms=5, color=color('chocolate3'))
392 axes.plot(
393 x[la(bnofix, nok)]/h, y[la(bnofix, nok)], 'x',
394 ms=5, color=color('aluminium4'))
396 tred = tcontrol - icontrol*deltat - tref
397 axes.plot(icontrol*deltat/h, tred, color=color('aluminium6'))
398 axes.plot(icontrol*deltat/h, tred, 'o', ms=5, color=color('aluminium6'))
400 ymin = (math.floor(tred.min() / deltat)-1) * deltat
401 ymax = (math.ceil(tred.max() / deltat)+1) * deltat
402 # ymin = min(ymin, num.min(y))
403 # ymax = max(ymax, num.max(y))
405 if ymax - ymin < 1000 * deltat:
406 ygrid = math.floor(tred.min() / deltat) * deltat
407 while ygrid < ymax:
408 axes.axhline(ygrid, color=color('aluminium4'), alpha=0.3)
409 ygrid += deltat
411 xmin = icontrol[0]*deltat/h
412 xmax = icontrol[-1]*deltat/h
413 xsize = xmax - xmin
414 xmin -= xsize * 0.1
415 xmax += xsize * 0.1
416 axes.set_xlim(xmin, xmax)
418 axes.set_ylim(ymin, ymax)
420 axes.set_xlabel('Uncorrected (quartz) time [h]')
421 axes.set_ylabel('Relative time correction [s]')
423 plt.show()
426g_dir_contexts = {}
429class DirContextEntry(Object):
430 path = String.T()
431 tstart = Timestamp.T()
432 ifile = Int.T()
435class DirContext(Object):
436 path = String.T()
437 mtime = Timestamp.T()
438 entries = DirContextEntry.T()
440 def get_entry(self, fn):
441 path = os.path.abspath(fn)
442 for entry in self.entries:
443 if entry.path == path:
444 return entry
446 raise Exception('entry not found')
448 def iter_entries(self, fn, step=1):
449 current = self.get_entry(fn)
450 by_ifile = dict(
451 (entry.ifile, entry) for entry in self.entries
452 if entry.tstart == current.tstart)
454 icurrent = current.ifile
455 while True:
456 icurrent += step
457 try:
458 yield by_ifile[icurrent]
460 except KeyError:
461 break
464def context(fn):
465 from pyrocko import datacube_ext
467 dpath = os.path.dirname(os.path.abspath(fn))
468 mtimes = [os.stat(dpath)[8]]
470 dentries = sorted([os.path.join(dpath, f) for f in os.listdir(dpath)
471 if os.path.isfile(os.path.join(dpath, f))])
472 for dentry in dentries:
473 fn2 = os.path.join(dpath, dentry)
474 mtimes.append(os.stat(fn2)[8])
476 mtime = float(max(mtimes))
478 if dpath in g_dir_contexts:
479 dir_context = g_dir_contexts[dpath]
480 if dir_context.mtime == mtime:
481 return dir_context
483 del g_dir_contexts[dpath]
485 entries = []
486 for dentry in dentries:
487 fn2 = os.path.join(dpath, dentry)
488 if not os.path.isfile(fn2):
489 continue
491 with open(fn2, 'rb') as f:
492 first512 = f.read(512)
493 if not detect(first512):
494 continue
496 with open(fn2, 'rb') as f:
497 try:
498 header, data_arrays, gps_tags, nsamples, _ = \
499 datacube_ext.load(f.fileno(), 3, 0, -1, None)
501 except datacube_ext.DataCubeError as e:
502 e = DataCubeError(str(e))
503 e.set_context('filename', fn)
504 raise e
506 header = dict(header)
507 entries.append(DirContextEntry(
508 path=os.path.abspath(fn2),
509 tstart=util.str_to_time(
510 '20' + header['S_DATE'] + ' ' + header['S_TIME'],
511 format='%Y/%m/%d %H:%M:%S'),
512 ifile=int(header['DAT_NO'])))
514 dir_context = DirContext(mtime=mtime, path=dpath, entries=entries)
516 return dir_context
519def get_time_infos(fn):
520 from pyrocko import datacube_ext
522 with open(fn, 'rb') as f:
523 try:
524 header, _, gps_tags, nsamples, _ = datacube_ext.load(
525 f.fileno(), 1, 0, -1, None)
527 except datacube_ext.DataCubeError as e:
528 e = DataCubeError(str(e))
529 e.set_context('filename', fn)
530 raise e
532 return dict(header), gps_tags, nsamples
535def get_timing_context(fns):
536 joined = [[], [], [], []]
537 ioff = 0
538 for fn in fns:
539 header, gps_tags, nsamples = get_time_infos(fn)
541 ipos = gps_tags[0]
542 ipos += ioff
544 for i in range(4):
545 joined[i].append(gps_tags[i])
547 ioff += nsamples
549 ipos, t, fix, nsvs = [num.concatenate(x) for x in joined]
551 nsamples = ioff
552 return ipos, t, fix, nsvs, header, 0, nsamples
555def get_extended_timing_context(fn):
556 c = context(fn)
558 header, gps_tags, nsamples_base = get_time_infos(fn)
560 ioff = 0
561 aggregated = [gps_tags]
563 nsamples_total = nsamples_base
565 if num.sum(gps_tags[2]) == 0:
567 ioff = nsamples_base
568 for entry in c.iter_entries(fn, 1):
570 _, gps_tags, nsamples = get_time_infos(entry.path)
572 ipos = gps_tags[0]
573 ipos += ioff
575 aggregated.append(gps_tags)
576 nsamples_total += nsamples
578 if num.sum(gps_tags[2]) > 0:
579 break
581 ioff += nsamples
583 ioff = 0
584 for entry in c.iter_entries(fn, -1):
586 _, gps_tags, nsamples = get_time_infos(entry.path)
588 ioff -= nsamples
590 ipos = gps_tags[0]
591 ipos += ioff
593 aggregated[0:0] = [gps_tags]
595 nsamples_total += nsamples
597 if num.sum(gps_tags[2]) > 0:
598 break
600 ipos, t, fix, nsvs = [num.concatenate(x) for x in zip(*aggregated)][:4]
602# return ipos, t, fix, nsvs, header, ioff, nsamples_total
603 return ipos, t, fix, nsvs, header, 0, nsamples_base
606def iload(fn, load_data=True, interpolation='sinc'):
607 from pyrocko import datacube_ext
608 from pyrocko import signal_ext
610 if interpolation not in ('sinc', 'off'):
611 raise NotImplementedError(
612 'no such interpolation method: %s' % interpolation)
614 with open(fn, 'rb') as f:
615 if load_data:
616 loadflag = 2
617 else:
618 loadflag = 1
620 try:
621 header, data_arrays, gps_tags, nsamples, _ = datacube_ext.load(
622 f.fileno(), loadflag, 0, -1, None)
624 except datacube_ext.DataCubeError as e:
625 e = DataCubeError(str(e))
626 e.set_context('filename', fn)
627 raise e
629 header = dict(header)
630 deltat = 1.0 / int(header['S_RATE'])
631 nchannels = int(header['CH_NUM'])
633 ipos, t, fix, nsvs, header_, offset_, nsamples_ = \
634 get_extended_timing_context(fn)
636 tmin, tmax, icontrol, tcontrol, _ = analyse_gps_tags(
637 header_, (ipos, t, fix, nsvs), offset_, nsamples_)
639 if icontrol is None:
640 logger.warning(
641 'No usable GPS timestamps found. Using datacube header '
642 'information to guess time. (file: "%s")' % fn)
644 tmin_ip = round(tmin / deltat) * deltat
645 if interpolation != 'off':
646 tmax_ip = round(tmax / deltat) * deltat
647 else:
648 tmax_ip = tmin_ip + (nsamples-1) * deltat
650 nsamples_ip = int(round((tmax_ip - tmin_ip)/deltat)) + 1
651 # to prevent problems with rounding errors:
652 tmax_ip = tmin_ip + (nsamples_ip-1) * deltat
654 leaps = num.array(
655 [x[0] + util.gps_utc_offset(x[0]) for x in util.read_leap_seconds2()],
656 dtype=float)
658 if load_data and icontrol is not None:
659 ncontrol_this = num.sum(
660 num.logical_and(0 <= icontrol, icontrol < nsamples))
662 if ncontrol_this <= 1:
663 logger.warning(
664 'Extrapolating GPS time information from directory context '
665 '(insufficient number of GPS timestamps in file: "%s").' % fn)
667 for i in range(nchannels):
668 if load_data:
669 arr = data_arrays[i]
670 assert arr.size == nsamples
672 if interpolation == 'sinc' and icontrol is not None:
674 ydata = num.empty(nsamples_ip, dtype=float)
675 try:
676 signal_ext.antidrift(
677 icontrol, tcontrol,
678 arr.astype(float), tmin_ip, deltat, ydata)
680 except signal_ext.Error as e:
681 e = DataCubeError(str(e))
682 e.set_context('filename', fn)
683 e.set_context('n_control_points', icontrol.size)
684 e.set_context('n_samples_raw', arr.size)
685 e.set_context('n_samples_ip', ydata.size)
686 e.set_context('tmin_ip', util.time_to_str(tmin_ip))
687 raise e
689 ydata = num.round(ydata).astype(arr.dtype)
690 else:
691 ydata = arr
693 tr_tmin = tmin_ip
694 tr_tmax = None
695 else:
696 ydata = None
697 tr_tmin = tmin_ip
698 tr_tmax = tmax_ip
700 tr = trace.Trace('', header['DEV_NO'], '', 'p%i' % i, deltat=deltat,
701 ydata=ydata, tmin=tr_tmin, tmax=tr_tmax, meta=header)
703 bleaps = num.logical_and(tmin_ip <= leaps, leaps < tmax_ip)
705 if num.any(bleaps):
706 assert num.sum(bleaps) == 1
707 tcut = leaps[bleaps][0]
709 for tmin_cut, tmax_cut in [
710 (tr.tmin, tcut), (tcut, tr.tmax+tr.deltat)]:
712 try:
713 tr_cut = tr.chop(tmin_cut, tmax_cut, inplace=False)
714 tr_cut.shift(
715 util.utc_gps_offset(0.5*(tr_cut.tmin+tr_cut.tmax)))
716 yield tr_cut
718 except trace.NoData:
719 pass
721 else:
722 tr.shift(util.utc_gps_offset(0.5*(tr.tmin+tr.tmax)))
723 yield tr
726header_keys = {
727 str: b'GIPP_V DEV_NO E_NAME GPS_PO S_TIME S_DATE DAT_NO'.split(),
728 int: b'''P_AMPL CH_NUM S_RATE D_FILT C_MODE A_CHOP F_TIME GPS_TI GPS_OF
729 A_FILT A_PHAS GPS_ON ACQ_ON V_TCXO D_VOLT E_VOLT'''.split()}
731all_header_keys = header_keys[str] + header_keys[int]
734def detect(first512):
735 s = first512
737 if len(s) < 512:
738 return False
740 if ord(s[0:1]) >> 4 != 15:
741 return False
743 n = s.find(b'\x80')
744 if n == -1:
745 n = len(s)
747 s = s[1:n]
748 s = s.replace(b'\xf0', b'')
749 s = s.replace(b';', b' ')
750 s = s.replace(b'=', b' ')
751 kvs = s.split(b' ')
753 if len([k for k in all_header_keys if k in kvs]) == 0:
754 return False
755 return True
758if __name__ == '__main__':
759 import argparse
760 parser = argparse.ArgumentParser(description='Datacube reader')
762 parser.add_argument(
763 'action', choices=['timeline', 'gnss', 'stations'],
764 help='Action')
765 parser.add_argument(
766 'files', nargs='+')
768 parser.add_argument(
769 '--loglevel', '-l',
770 choices=['critical', 'error', 'warning', 'info', 'debug'],
771 default='info',
772 help='Set logger level. Default: %(default)s')
774 args = parser.parse_args()
776 util.setup_logging('pyrocko.io.datacube', args.loglevel)
777 logging.getLogger('matplotlib.font_manager').disabled = True
779 if args.action == 'timeline':
780 plot_timeline(args.files)
782 elif args.action == 'gnss':
783 plot_gnss_location_timeline(args.files)
785 elif args.action == 'stations':
786 extract_stations(args.files)