1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
5# python 2/3
6from __future__ import absolute_import
8import os
9import math
10import logging
12import numpy as num
14from pyrocko import trace, util, plot
15from pyrocko.guts import Object, Int, String, Timestamp
17from . import io_common
19logger = logging.getLogger(__name__)
21N_GPS_TAGS_WANTED = 200 # must match definition in datacube_ext.c
23APPLY_SUBSAMPLE_SHIFT_CORRECTION = True
26def color(c):
27 c = plot.color(c)
28 return tuple(x/255. for x in c)
31class DataCubeError(io_common.FileLoadError):
32 pass
35class ControlPointError(Exception):
36 pass
39def make_control_point(ipos_block, t_block, tref, deltat):
41 # reduce time (no drift would mean straight line)
42 tred = (t_block - tref) - ipos_block*deltat
44 # first round, remove outliers
45 q25, q75 = num.percentile(tred, (25., 75.))
46 iok = num.logical_and(q25 <= tred, tred <= q75)
48 # detrend
49 slope, offset = num.polyfit(ipos_block[iok], tred[iok], 1)
50 tred2 = tred - (offset + slope * ipos_block)
52 # second round, remove outliers based on detrended tred, refit
53 q25, q75 = num.percentile(tred2, (25., 75.))
54 iok = num.logical_and(q25 <= tred2, tred2 <= q75)
55 x = ipos_block[iok].copy()
56 ipos0 = x[0]
57 x -= ipos0
58 y = tred[iok].copy()
59 if x.size < 2:
60 raise ControlPointError('Insufficient number control points after QC.')
62 elif x.size < 5: # needed for older numpy versions
63 (slope, offset) = num.polyfit(x, y, 1)
64 else:
65 (slope, offset), cov = num.polyfit(x, y, 1, cov=True)
67 slope_err, offset_err = num.sqrt(num.diag(cov))
69 slope_err_limit = 1.0e-10
70 if ipos_block.size < N_GPS_TAGS_WANTED // 2:
71 slope_err_limit *= (200. / ipos_block.size)
73 offset_err_limit = 5.0e-3
75 if slope_err > slope_err_limit:
76 raise ControlPointError(
77 'Slope error too large: %g (limit: %g' % (
78 slope_err, slope_err_limit))
80 if offset_err > offset_err_limit:
81 raise ControlPointError(
82 'Offset error too large: %g (limit: %g' % (
83 offset_err, offset_err_limit))
85 ic = ipos_block[ipos_block.size//2]
86 tc = offset + slope * (ic - ipos0)
88 return ic, tc + ic * deltat + tref
91def analyse_gps_tags(header, gps_tags, offset, nsamples):
93 ipos, t, fix, nsvs = gps_tags
94 deltat = 1.0 / int(header['S_RATE'])
96 tquartz = offset + ipos * deltat
98 toff = t - tquartz
99 toff_median = num.median(toff)
101 n = t.size
103 dtdt = (t[1:n] - t[0:n-1]) / (tquartz[1:n] - tquartz[0:n-1])
105 ok = abs(toff_median - toff) < 10.
107 xok = num.abs(dtdt - 1.0) < 0.00001
109 if ok.size >= 1:
111 ok[0] = False
112 ok[1:n] &= xok
113 ok[0:n-1] &= xok
114 ok[n-1] = False
116 ipos = ipos[ok]
117 t = t[ok]
118 fix = fix[ok]
119 nsvs = nsvs[ok]
121 blocksize = N_GPS_TAGS_WANTED // 2
122 if ipos.size < blocksize:
123 blocksize = max(10, ipos.size // 4)
124 logger.warning(
125 'Small number of GPS tags found. '
126 'Reducing analysis block size to %i tags. '
127 'Time correction may be unreliable.' % blocksize)
129 try:
130 if ipos.size < blocksize:
131 raise ControlPointError(
132 'Cannot determine GPS time correction: '
133 'Too few GPS tags found: %i' % ipos.size)
135 j = 0
136 control_points = []
137 tref = num.median(t - ipos*deltat)
138 while j < ipos.size - blocksize:
139 ipos_block = ipos[j:j+blocksize]
140 t_block = t[j:j+blocksize]
141 try:
142 ic, tc = make_control_point(ipos_block, t_block, tref, deltat)
143 control_points.append((ic, tc))
144 except ControlPointError as e:
145 logger.debug(str(e))
147 j += blocksize
149 ipos_last = ipos[-blocksize:]
150 t_last = t[-blocksize:]
151 try:
152 ic, tc = make_control_point(ipos_last, t_last, tref, deltat)
153 control_points.append((ic, tc))
154 except ControlPointError as e:
155 logger.debug(str(e))
157 if len(control_points) < 2:
158 raise ControlPointError(
159 'Could not safely determine time corrections from GPS: '
160 'unable to construct two or more control points')
162 i0, t0 = control_points[0]
163 i1, t1 = control_points[1]
164 i2, t2 = control_points[-2]
165 i3, t3 = control_points[-1]
166 if len(control_points) == 2:
167 tmin = t0 - i0 * deltat - offset * deltat
168 tmax = t3 + (nsamples - i3 - 1) * deltat
169 else:
170 icontrol = num.array(
171 [x[0] for x in control_points], dtype=num.int64)
172 tcontrol = num.array(
173 [x[1] for x in control_points], dtype=float)
174 # robust against steps:
175 slope = num.median(
176 (tcontrol[1:] - tcontrol[:-1])
177 / (icontrol[1:] - icontrol[:-1]))
179 tmin = t0 + (offset - i0) * slope
180 tmax = t2 + (offset + nsamples - 1 - i2) * slope
182 if offset < i0:
183 control_points[0:0] = [(offset, tmin)]
185 if offset + nsamples - 1 > i3:
186 control_points.append((offset + nsamples - 1, tmax))
188 icontrol = num.array([x[0] for x in control_points], dtype=num.int64)
189 tcontrol = num.array([x[1] for x in control_points], dtype=float)
191 # corrected 2021-10-26: This sub-sample time shift introduced by the
192 # Cube's ADC was previously not recognized.
193 if APPLY_SUBSAMPLE_SHIFT_CORRECTION:
194 tcontrol -= 0.199 * deltat + 0.0003
196 return tmin, tmax, icontrol, tcontrol, ok
198 except ControlPointError as e:
199 logger.error(str(e))
201 tmin = util.str_to_time(header['S_DATE'] + header['S_TIME'],
202 format='%y/%m/%d%H:%M:%S')
204 idat = int(header['DAT_NO'])
205 if idat == 0:
206 tmin = tmin + util.gps_utc_offset(tmin)
207 else:
208 tmin = util.day_start(tmin + idat * 24.*3600.) \
209 + util.gps_utc_offset(tmin)
211 tmax = tmin + (nsamples - 1) * deltat
212 icontrol = num.array([offset, offset+nsamples - 1], dtype=num.int64)
213 tcontrol = num.array([tmin, tmax])
214 return tmin, tmax, icontrol, tcontrol, ok
217def plot_gnss_location_timeline(fns):
218 from matplotlib import pyplot as plt
219 from pyrocko.orthodrome import latlon_to_ne_numpy
220 not_ = num.logical_not
221 h = 3600.
223 fig = plt.figure()
225 axes = []
226 for i in range(4):
227 axes.append(
228 fig.add_subplot(4, 1, i+1, sharex=axes[-1] if axes else None))
230 background_colors = [
231 color('aluminium1'),
232 color('aluminium2')]
234 tref = None
235 for ifn, fn in enumerate(fns):
236 header, gps_tags, nsamples = get_time_infos(fn)
237 _, t, fix, nsvs, lats, lons, elevations, _ = gps_tags
239 fix = fix.astype(bool)
241 if t.size < 2:
242 logger.warning('Need at least 2 gps tags for plotting: %s' % fn)
244 if tref is None:
245 tref = util.day_start(t[0])
246 lat, lon, elevation = coordinates_from_gps(gps_tags)
248 norths, easts = latlon_to_ne_numpy(lat, lon, lats, lons)
250 for ax, data in zip(axes, (norths, easts, elevations, nsvs)):
252 tspan = t[num.array([0, -1])]
254 ax.axvspan(*((tspan - tref) / h), color=background_colors[ifn % 2])
255 med = num.median(data)
256 ax.plot(
257 (tspan - tref) / h,
258 [med, med],
259 ls='--',
260 c='k',
261 lw=3,
262 alpha=0.5)
264 ax.plot(
265 (t[not_(fix)] - tref) / h, data[not_(fix)], 'o',
266 ms=1.5,
267 mew=0,
268 color=color('scarletred2'))
270 ax.plot(
271 (t[fix] - tref) / h, data[fix], 'o',
272 ms=1.5,
273 mew=0,
274 color=color('aluminium6'))
276 for ax in axes:
277 ax.grid(alpha=.3)
279 ax_lat, ax_lon, ax_elev, ax_nsv = axes
281 ax_lat.set_ylabel('Northing [m]')
282 ax_lon.set_ylabel('Easting [m]')
283 ax_elev.set_ylabel('Elevation [m]')
284 ax_nsv.set_ylabel('Number of Satellites')
286 ax_lat.get_xaxis().set_tick_params(labelbottom=False)
287 ax_lon.get_xaxis().set_tick_params(labelbottom=False)
288 ax_nsv.set_xlabel(
289 'Hours after %s' % util.time_to_str(tref, format='%Y-%m-%d'))
291 fig.suptitle(
292 u'Lat: %.5f\u00b0 Lon: %.5f\u00b0 Elevation: %g m' % (
293 lat, lon, elevation))
295 plt.show()
298def coordinates_from_gps(gps_tags):
299 ipos, t, fix, nsvs, lats, lons, elevations, temps = gps_tags
300 return tuple(num.median(x) for x in (lats, lons, elevations))
303def extract_stations(fns):
304 import io
305 import sys
306 from pyrocko.model import Station
307 from pyrocko.guts import dump_all
309 stations = {}
311 for fn in fns:
312 sta_name = os.path.splitext(fn)[1].lstrip('.')
313 if sta_name in stations:
314 logger.warning('Cube %s already in list!', sta_name)
315 continue
317 header, gps_tags, nsamples = get_time_infos(fn)
319 lat, lon, elevation = coordinates_from_gps(gps_tags)
321 sta = Station(
322 network='',
323 station=sta_name,
324 name=sta_name,
325 location='',
326 lat=lat,
327 lon=lon,
328 elevation=elevation)
330 stations[sta_name] = sta
332 f = io.BytesIO()
333 dump_all(stations.values(), stream=f)
334 sys.stdout.write(f.getvalue().decode())
337def plot_timeline(fns):
338 from matplotlib import pyplot as plt
340 fig = plt.figure()
341 axes = fig.gca()
343 h = 3600.
345 if isinstance(fns, str):
346 fn = fns
347 if os.path.isdir(fn):
348 fns = [
349 os.path.join(fn, entry) for entry in sorted(os.listdir(fn))]
351 ipos, t, fix, nsvs, header, offset, nsamples = \
352 get_timing_context(fns)
354 else:
355 ipos, t, fix, nsvs, header, offset, nsamples = \
356 get_extended_timing_context(fn)
358 else:
359 ipos, t, fix, nsvs, header, offset, nsamples = \
360 get_timing_context(fns)
362 deltat = 1.0 / int(header['S_RATE'])
364 tref = num.median(t - ipos * deltat)
365 tref = round(tref / deltat) * deltat
367 if APPLY_SUBSAMPLE_SHIFT_CORRECTION:
368 tcorr = 0.199 * deltat + 0.0003
369 else:
370 tcorr = 0.0
372 x = ipos*deltat
373 y = (t - tref) - ipos*deltat - tcorr
375 bfix = fix != 0
376 bnofix = fix == 0
378 tmin, tmax, icontrol, tcontrol, ok = analyse_gps_tags(
379 header, (ipos, t, fix, nsvs), offset, nsamples)
381 la = num.logical_and
382 nok = num.logical_not(ok)
384 axes.plot(
385 x[la(bfix, ok)]/h, y[la(bfix, ok)], '+',
386 ms=5, color=color('chameleon3'))
387 axes.plot(
388 x[la(bfix, nok)]/h, y[la(bfix, nok)], '+',
389 ms=5, color=color('aluminium4'))
391 axes.plot(
392 x[la(bnofix, ok)]/h, y[la(bnofix, ok)], 'x',
393 ms=5, color=color('chocolate3'))
394 axes.plot(
395 x[la(bnofix, nok)]/h, y[la(bnofix, nok)], 'x',
396 ms=5, color=color('aluminium4'))
398 tred = tcontrol - icontrol*deltat - tref
399 axes.plot(icontrol*deltat/h, tred, color=color('aluminium6'))
400 axes.plot(icontrol*deltat/h, tred, 'o', ms=5, color=color('aluminium6'))
402 ymin = (math.floor(tred.min() / deltat)-1) * deltat
403 ymax = (math.ceil(tred.max() / deltat)+1) * deltat
404 # ymin = min(ymin, num.min(y))
405 # ymax = max(ymax, num.max(y))
407 if ymax - ymin < 1000 * deltat:
408 ygrid = math.floor(tred.min() / deltat) * deltat
409 while ygrid < ymax:
410 axes.axhline(ygrid, color=color('aluminium4'), alpha=0.3)
411 ygrid += deltat
413 xmin = icontrol[0]*deltat/h
414 xmax = icontrol[-1]*deltat/h
415 xsize = xmax - xmin
416 xmin -= xsize * 0.1
417 xmax += xsize * 0.1
418 axes.set_xlim(xmin, xmax)
420 axes.set_ylim(ymin, ymax)
422 axes.set_xlabel('Uncorrected (quartz) time [h]')
423 axes.set_ylabel('Relative time correction [s]')
425 plt.show()
428g_dir_contexts = {}
431class DirContextEntry(Object):
432 path = String.T()
433 tstart = Timestamp.T()
434 ifile = Int.T()
437class DirContext(Object):
438 path = String.T()
439 mtime = Timestamp.T()
440 entries = DirContextEntry.T()
442 def get_entry(self, fn):
443 path = os.path.abspath(fn)
444 for entry in self.entries:
445 if entry.path == path:
446 return entry
448 raise Exception('entry not found')
450 def iter_entries(self, fn, step=1):
451 current = self.get_entry(fn)
452 by_ifile = dict(
453 (entry.ifile, entry) for entry in self.entries
454 if entry.tstart == current.tstart)
456 icurrent = current.ifile
457 while True:
458 icurrent += step
459 try:
460 yield by_ifile[icurrent]
462 except KeyError:
463 break
466def context(fn):
467 from pyrocko import datacube_ext
469 dpath = os.path.dirname(os.path.abspath(fn))
470 mtimes = [os.stat(dpath)[8]]
472 dentries = sorted([os.path.join(dpath, f) for f in os.listdir(dpath)
473 if os.path.isfile(os.path.join(dpath, f))])
474 for dentry in dentries:
475 fn2 = os.path.join(dpath, dentry)
476 mtimes.append(os.stat(fn2)[8])
478 mtime = float(max(mtimes))
480 if dpath in g_dir_contexts:
481 dir_context = g_dir_contexts[dpath]
482 if dir_context.mtime == mtime:
483 return dir_context
485 del g_dir_contexts[dpath]
487 entries = []
488 for dentry in dentries:
489 fn2 = os.path.join(dpath, dentry)
490 if not os.path.isfile(fn2):
491 continue
493 with open(fn2, 'rb') as f:
494 first512 = f.read(512)
495 if not detect(first512):
496 continue
498 with open(fn2, 'rb') as f:
499 try:
500 header, data_arrays, gps_tags, nsamples, _ = \
501 datacube_ext.load(f.fileno(), 3, 0, -1, None)
503 except datacube_ext.DataCubeError as e:
504 e = DataCubeError(str(e))
505 e.set_context('filename', fn)
506 raise e
508 header = dict(header)
509 entries.append(DirContextEntry(
510 path=os.path.abspath(fn2),
511 tstart=util.str_to_time(
512 '20' + header['S_DATE'] + ' ' + header['S_TIME'],
513 format='%Y/%m/%d %H:%M:%S'),
514 ifile=int(header['DAT_NO'])))
516 dir_context = DirContext(mtime=mtime, path=dpath, entries=entries)
518 return dir_context
521def get_time_infos(fn):
522 from pyrocko import datacube_ext
524 with open(fn, 'rb') as f:
525 try:
526 header, _, gps_tags, nsamples, _ = datacube_ext.load(
527 f.fileno(), 1, 0, -1, None)
529 except datacube_ext.DataCubeError as e:
530 e = DataCubeError(str(e))
531 e.set_context('filename', fn)
532 raise e
534 return dict(header), gps_tags, nsamples
537def get_timing_context(fns):
538 joined = [[], [], [], []]
539 ioff = 0
540 for fn in fns:
541 header, gps_tags, nsamples = get_time_infos(fn)
543 ipos = gps_tags[0]
544 ipos += ioff
546 for i in range(4):
547 joined[i].append(gps_tags[i])
549 ioff += nsamples
551 ipos, t, fix, nsvs = [num.concatenate(x) for x in joined]
553 nsamples = ioff
554 return ipos, t, fix, nsvs, header, 0, nsamples
557def get_extended_timing_context(fn):
558 c = context(fn)
560 header, gps_tags, nsamples_base = get_time_infos(fn)
562 ioff = 0
563 aggregated = [gps_tags]
565 nsamples_total = nsamples_base
567 if num.sum(gps_tags[2]) == 0:
569 ioff = nsamples_base
570 for entry in c.iter_entries(fn, 1):
572 _, gps_tags, nsamples = get_time_infos(entry.path)
574 ipos = gps_tags[0]
575 ipos += ioff
577 aggregated.append(gps_tags)
578 nsamples_total += nsamples
580 if num.sum(gps_tags[2]) > 0:
581 break
583 ioff += nsamples
585 ioff = 0
586 for entry in c.iter_entries(fn, -1):
588 _, gps_tags, nsamples = get_time_infos(entry.path)
590 ioff -= nsamples
592 ipos = gps_tags[0]
593 ipos += ioff
595 aggregated[0:0] = [gps_tags]
597 nsamples_total += nsamples
599 if num.sum(gps_tags[2]) > 0:
600 break
602 ipos, t, fix, nsvs = [num.concatenate(x) for x in zip(*aggregated)][:4]
604# return ipos, t, fix, nsvs, header, ioff, nsamples_total
605 return ipos, t, fix, nsvs, header, 0, nsamples_base
608def iload(fn, load_data=True, interpolation='sinc'):
609 from pyrocko import datacube_ext
610 from pyrocko import signal_ext
612 if interpolation not in ('sinc', 'off'):
613 raise NotImplementedError(
614 'no such interpolation method: %s' % interpolation)
616 with open(fn, 'rb') as f:
617 if load_data:
618 loadflag = 2
619 else:
620 loadflag = 1
622 try:
623 header, data_arrays, gps_tags, nsamples, _ = datacube_ext.load(
624 f.fileno(), loadflag, 0, -1, None)
626 except datacube_ext.DataCubeError as e:
627 e = DataCubeError(str(e))
628 e.set_context('filename', fn)
629 raise e
631 header = dict(header)
632 deltat = 1.0 / int(header['S_RATE'])
633 nchannels = int(header['CH_NUM'])
635 ipos, t, fix, nsvs, header_, offset_, nsamples_ = \
636 get_extended_timing_context(fn)
638 tmin, tmax, icontrol, tcontrol, _ = analyse_gps_tags(
639 header_, (ipos, t, fix, nsvs), offset_, nsamples_)
641 if icontrol is None:
642 logger.warning(
643 'No usable GPS timestamps found. Using datacube header '
644 'information to guess time. (file: "%s")' % fn)
646 tmin_ip = round(tmin / deltat) * deltat
647 if interpolation != 'off':
648 tmax_ip = round(tmax / deltat) * deltat
649 else:
650 tmax_ip = tmin_ip + (nsamples-1) * deltat
652 nsamples_ip = int(round((tmax_ip - tmin_ip)/deltat)) + 1
653 # to prevent problems with rounding errors:
654 tmax_ip = tmin_ip + (nsamples_ip-1) * deltat
656 leaps = num.array(
657 [x[0] + util.gps_utc_offset(x[0]) for x in util.read_leap_seconds2()],
658 dtype=float)
660 if load_data and icontrol is not None:
661 ncontrol_this = num.sum(
662 num.logical_and(0 <= icontrol, icontrol < nsamples))
664 if ncontrol_this <= 1:
665 logger.warning(
666 'Extrapolating GPS time information from directory context '
667 '(insufficient number of GPS timestamps in file: "%s").' % fn)
669 for i in range(nchannels):
670 if load_data:
671 arr = data_arrays[i]
672 assert arr.size == nsamples
674 if interpolation == 'sinc' and icontrol is not None:
676 ydata = num.empty(nsamples_ip, dtype=float)
677 try:
678 signal_ext.antidrift(
679 icontrol, tcontrol,
680 arr.astype(float), tmin_ip, deltat, ydata)
682 except signal_ext.Error as e:
683 e = DataCubeError(str(e))
684 e.set_context('filename', fn)
685 e.set_context('n_control_points', icontrol.size)
686 e.set_context('n_samples_raw', arr.size)
687 e.set_context('n_samples_ip', ydata.size)
688 e.set_context('tmin_ip', util.time_to_str(tmin_ip))
689 raise e
691 ydata = num.round(ydata).astype(arr.dtype)
692 else:
693 ydata = arr
695 tr_tmin = tmin_ip
696 tr_tmax = None
697 else:
698 ydata = None
699 tr_tmin = tmin_ip
700 tr_tmax = tmax_ip
702 tr = trace.Trace('', header['DEV_NO'], '', 'p%i' % i, deltat=deltat,
703 ydata=ydata, tmin=tr_tmin, tmax=tr_tmax, meta=header)
705 bleaps = num.logical_and(tmin_ip <= leaps, leaps < tmax_ip)
707 if num.any(bleaps):
708 assert num.sum(bleaps) == 1
709 tcut = leaps[bleaps][0]
711 for tmin_cut, tmax_cut in [
712 (tr.tmin, tcut), (tcut, tr.tmax+tr.deltat)]:
714 try:
715 tr_cut = tr.chop(tmin_cut, tmax_cut, inplace=False)
716 tr_cut.shift(
717 util.utc_gps_offset(0.5*(tr_cut.tmin+tr_cut.tmax)))
718 yield tr_cut
720 except trace.NoData:
721 pass
723 else:
724 tr.shift(util.utc_gps_offset(0.5*(tr.tmin+tr.tmax)))
725 yield tr
728header_keys = {
729 str: b'GIPP_V DEV_NO E_NAME GPS_PO S_TIME S_DATE DAT_NO'.split(),
730 int: b'''P_AMPL CH_NUM S_RATE D_FILT C_MODE A_CHOP F_TIME GPS_TI GPS_OF
731 A_FILT A_PHAS GPS_ON ACQ_ON V_TCXO D_VOLT E_VOLT'''.split()}
733all_header_keys = header_keys[str] + header_keys[int]
736def detect(first512):
737 s = first512
739 if len(s) < 512:
740 return False
742 if ord(s[0:1]) >> 4 != 15:
743 return False
745 n = s.find(b'\x80')
746 if n == -1:
747 n = len(s)
749 s = s[1:n]
750 s = s.replace(b'\xf0', b'')
751 s = s.replace(b';', b' ')
752 s = s.replace(b'=', b' ')
753 kvs = s.split(b' ')
755 if len([k for k in all_header_keys if k in kvs]) == 0:
756 return False
757 return True
760if __name__ == '__main__':
761 import argparse
762 parser = argparse.ArgumentParser(description='Datacube reader')
764 parser.add_argument(
765 'action', choices=['timeline', 'gnss', 'stations'],
766 help='Action')
767 parser.add_argument(
768 'files', nargs='+')
770 parser.add_argument(
771 '--loglevel', '-l',
772 choices=['critical', 'error', 'warning', 'info', 'debug'],
773 default='info',
774 help='Set logger level. Default: %(default)s')
776 args = parser.parse_args()
778 util.setup_logging('pyrocko.io.datacube', args.loglevel)
779 logging.getLogger('matplotlib.font_manager').disabled = True
781 if args.action == 'timeline':
782 plot_timeline(args.files)
784 elif args.action == 'gnss':
785 plot_gnss_location_timeline(args.files)
787 elif args.action == 'stations':
788 extract_stations(args.files)