Prompt seismological data access with a fluffy tail.
The infrastructure to power a seismogram browser
pyrocko.pile
is a framework for
seismological dataset access.
squirrel
Task: get hourly RMS values for a seismic record
def rms(data):
return np.sqrt(np.sum(data**2))
Oh no!
Now my script is 300 lines of code!
And sooo slow!
Task: get hourly RMS values for a seismic record
import numpy as np
from pyrocko.util import time_to_str as tts
from pyrocko.squirrel import Squirrel
def rms(data):
return np.sqrt(np.sum(data**2))
sq = Squirrel()
sq.add('data/2011-2016')
for batch in sq.chopper_waveforms(
tinc=3600., want_incomplete=False, snap_window=True):
for tr in batch.traces:
print(tr.str_codes, tts(tr.tmin), rms(tr.ydata))
The Squirrel is 10.000 lines of code.
😱
Is it justified?
To replace 300 lines of script code
with this vast complexity?
Task: download data for moment tensor inversion, prepare data and deconvolve instrument response, run moment tensor inversion.
Functional workflow is better for incremental updates.
from pyrocko import squirrel
sq = squirrel.Squirrel()
from pyrocko import squirrel
sq = squirrel.Squirrel()
sq.add(['data.mseed', 'stations.xml']) # content indexed
sq.add('events.txt')
sq.add(['data.mseed', 'stations.xml'])
sq.add('events.txt')
sq.get_stations() # -> list of all station objects
sq.get_station(codes='*.STA23.*.*') # -> matching station
sq.get_stations() # -> list of all station objects
sq.get_station(codes='*.STA23.*.*') # -> matching station
sq.remove('stations.xml') # only from live selection
sys.exit()
sq.remove('stations.xml') # only from live selection
sys.exit() # app quits, database persists
sq = Squirrel() # second start of app
sq.add('data.mseed')
sq = Squirrel()
sq.add('data.mseed') # checks for modifications
sq = Squirrel()
sq.add('data.mseed') # updates index as needed
sq = Squirrel()
sq.add('data.mseed', check=False) # only index if unknown
sq = Squirrel() # other app
sq.add('stations.xml') # selection is private by default
sq = Squirrel(persistent='S1') # use selection named "S1"
sq.add('data.mseed')
sq.add_fdsn('geofon', query_args={'network': 'GE'})
sq.update(tmin=tmin, tmax=tmax)
sq.add_fdsn('geofon', query_args={'network': 'GE'})
sq.update(tmin=tmin, tmax=tmax) # update for time range
sq.add_fdsn('geofon', query_args={'network': 'GE'},
expires=3600.) # expires in 1h
sq.update_waveform_promises(tmin=tmin, tmax=tmax)
traces = sq.get_waveforms(tmin=tmin, tmax=tmax)
sq.update_waveform_promises(tmin=tmin, tmax=tmax)
trs = sq.get_waveforms(tmin=tmin, tmax=tmax, codes='STA1')
sq.update_waveform_promises(tmin=tmin, tmax=tmax)
trs = sq.get_waveforms(tmin=tmin, tmax=tmax, codes='STA1')
sq.update_waveform_promises(tmin=tmin, tmax=tmax)
trs = sq.get_waveforms(tmin=tmin, tmax=tmax, codes='STA1')
from pyrocko.util import str_to_time as stt
from pyrocko.squirrel import Squirrel
sq = Squirrel()
sq.add('data/2011')
sq.add('meta')
print(sq)
stations = sq.get_stations()
print(stations[3])
channels = sq.get_channels(stations[1])
for channel in channels:
print(channel)
traces = sq.get_waveforms(
channels[0],
tmin=stt('2011-10-01 00:00:00'),
tmax=stt('2011-10-01 01:00:00'))
for tr in traces:
print(tr)
response = sq.get_response(traces[0])
print(response)
sq.snuffle()
import time
from pyrocko import util
from pyrocko.squirrel import Squirrel
days = 24. * 3600.
tmin = util.day_start(time.time())
tmax = tmin + 1*days
sq = Squirrel()
sq.add_catalog('geofon', expires=30*days)
sq.add_fdsn('bgr', query_args=dict(network='GR'))
sq.update(tmin=tmin, tmax=tmax)
sq.update_waveform_promises(codes=('*', 'BFO', '*', '*Z'))
events = sq.get_events()
for ev in events:
print(ev)
stations = sq.get_stations()
channels = sq.get_channels()
responses = sq.get_responses()
traces = sq.get_waveforms(tmin=tmin, tmax=tmin+3600.)
sq.snuffle()
https://pyrocko.org
/docs/current/topics/squirrel.html