1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6import sys
7import os
8import logging
9import sqlite3
10import re
11import time
13from pyrocko.io.io_common import FileLoadError
14from pyrocko import util
15from pyrocko.guts import Object, Int, List, Dict, Tuple, String
16from . import error, io
17from .model import Nut, to_kind_id, to_kind, to_codes_simple, \
18 codes_patterns_for_kind
19from .error import SquirrelError
21logger = logging.getLogger('psq.database')
23guts_prefix = 'squirrel'
26def abspath(path):
27 if not path.startswith('virtual:') and not path.startswith('client:'):
28 return os.path.abspath(path)
29 else:
30 return path
33def versiontuple(s):
34 fill = [0, 0, 0]
35 vals = [int(x) for x in s.split('.')]
36 fill[:len(vals)] = vals
37 return tuple(fill)
40class ExecuteGet1Error(SquirrelError):
41 pass
44def execute_get1(connection, sql, args=()):
45 rows = list(connection.execute(sql, args))
46 if len(rows) == 1:
47 return rows[0]
48 else:
49 raise ExecuteGet1Error('Expected database entry not found.')
52g_databases = {}
55def get_database(path):
56 path = os.path.abspath(path)
57 if path not in g_databases:
58 g_databases[path] = Database(path)
60 return g_databases[path]
63def close_database(database):
64 path = os.path.abspath(database._database_path)
65 database._conn.close()
66 if path in g_databases:
67 del g_databases[path]
70class Transaction(object):
71 def __init__(
72 self, conn,
73 label='',
74 mode='immediate',
75 retry_interval=0.1,
76 callback=None):
78 self.cursor = conn.cursor()
79 assert mode in ('deferred', 'immediate', 'exclusive')
80 self.mode = mode
81 self.depth = 0
82 self.rollback_wanted = False
83 self.retry_interval = retry_interval
84 self.callback = callback
85 self.label = label
86 self.started = False
88 def begin(self):
89 if self.depth == 0:
90 tries = 0
91 while True:
92 try:
93 tries += 1
94 self.cursor.execute('BEGIN %s' % self.mode.upper())
95 self.started = True
96 logger.debug(
97 'Transaction started: %-30s (pid: %s, mode: %s)'
98 % (self.label, os.getpid(), self.mode))
100 self.total_changes_begin \
101 = self.cursor.connection.total_changes
102 break
104 except sqlite3.OperationalError as e:
105 if not str(e) == 'database is locked':
106 raise
108 logger.info(
109 'Database is locked retrying in %s s: %s '
110 '(pid: %s, tries: %i)' % (
111 self.retry_interval, self.label,
112 os.getpid(), tries))
114 time.sleep(self.retry_interval)
116 self.depth += 1
118 def commit(self):
119 if not self.started:
120 raise Exception(
121 'Trying to commit without having started a transaction.')
123 self.depth -= 1
124 if self.depth == 0:
125 if not self.rollback_wanted:
126 self.cursor.execute('COMMIT')
127 self.started = False
128 if self.total_changes_begin is not None:
129 total_changes = self.cursor.connection.total_changes \
130 - self.total_changes_begin
131 else:
132 total_changes = None
134 if self.callback is not None and total_changes:
135 self.callback('modified', total_changes)
137 logger.debug(
138 'Transaction completed: %-30s '
139 '(pid: %s, changes: %i)' % (
140 self.label, os.getpid(), total_changes or 0))
142 else:
143 self.cursor.execute('ROLLBACK')
144 self.started = False
145 logger.warning('Deferred rollback executed.')
146 logger.debug(
147 'Transaction failed: %-30s (pid: %s)' % (
148 self.label, os.getpid()))
149 self.rollback_wanted = False
151 def rollback(self):
152 if not self.started:
153 raise Exception(
154 'Trying to rollback without having started a transaction.')
156 self.depth -= 1
157 if self.depth == 0:
158 self.cursor.execute('ROLLBACK')
159 self.started = False
161 logger.debug(
162 'Transaction failed: %-30s (pid: %s)' % (
163 self.label, os.getpid()))
165 self.rollback_wanted = False
166 else:
167 logger.warning('Deferred rollback scheduled.')
168 self.rollback_wanted = True
170 def close(self):
171 self.cursor.close()
173 def __enter__(self):
174 self.begin()
175 return self.cursor
177 def __exit__(self, exc_type, exc_value, traceback):
178 if exc_type is None:
179 self.commit()
180 else:
181 self.rollback()
183 if self.depth == 0:
184 self.close()
185 self.callback = None
188class Database(object):
189 '''
190 Shared meta-information database used by Squirrel.
191 '''
193 def __init__(self, database_path=':memory:', log_statements=False):
194 self._database_path = database_path
195 if database_path != ':memory:':
196 util.ensuredirs(database_path)
198 try:
199 logger.debug(
200 'Opening connection to database (threadsafety: %i): %s',
201 sqlite3.threadsafety,
202 database_path)
204 self._conn = sqlite3.connect(
205 database_path,
206 isolation_level=None,
207 check_same_thread=False if sqlite3.threadsafety else True)
209 except sqlite3.OperationalError:
210 raise error.SquirrelError(
211 'Cannot connect to database: %s' % database_path)
213 self._conn.text_factory = str
214 self._tables = {}
216 if log_statements:
217 self._conn.set_trace_callback(self._log_statement)
219 self._listeners = []
220 self._initialize_db()
221 self._basepath = None
223 self.version = None
225 def set_basepath(self, basepath):
226 if basepath is not None:
227 self._basepath = os.path.abspath(basepath)
228 else:
229 self._basepath = None
231 def relpath(self, path):
232 if self._basepath is not None and path.startswith(
233 self._basepath + os.path.sep):
234 return path[len(self._basepath) + 1:]
235 else:
236 return path
238 def abspath(self, path):
239 if self._basepath is not None and not path.startswith('virtual:') \
240 and not path.startswith('client:') \
241 and not os.path.isabs(path):
242 return os.path.join(self._basepath, path)
243 else:
244 return path
246 def _log_statement(self, statement):
247 logger.debug(statement)
249 def get_connection(self):
250 return self._conn
252 def transaction(self, label='', mode='immediate'):
253 return Transaction(
254 self._conn,
255 label=label,
256 mode=mode,
257 callback=self._notify_listeners)
259 def add_listener(self, listener):
260 listener_ref = util.smart_weakref(listener)
261 self._listeners.append(listener_ref)
262 return listener_ref
264 def remove_listener(self, listener_ref):
265 self._listeners.remove(listener_ref)
267 def _notify_listeners(self, event, *args):
268 dead = []
269 for listener_ref in self._listeners:
270 listener = listener_ref()
271 if listener is not None:
272 listener(event, *args)
273 else:
274 dead.append(listener_ref)
276 for listener_ref in dead:
277 self.remove_listener(listener_ref)
279 def _register_table(self, s):
280 m = re.search(r'(\S+)\s*\(([^)]+)\)', s)
281 table_name = m.group(1)
282 dtypes = m.group(2)
283 table_header = []
284 for dele in dtypes.split(','):
285 table_header.append(dele.split()[:2])
287 self._tables[table_name] = table_header
289 return s
291 def _initialize_db(self):
292 with self.transaction('initialize') as cursor:
293 cursor.execute(
294 '''PRAGMA recursive_triggers = true''')
296 cursor.execute(
297 '''PRAGMA busy_timeout = 30000''')
299 if 2 == len(list(
300 cursor.execute(
301 '''
302 SELECT name FROM sqlite_master
303 WHERE type = 'table' AND name IN (
304 'files',
305 'persistent')
306 '''))):
308 try:
309 self.version = versiontuple(execute_get1(
310 cursor,
311 '''
312 SELECT value FROM settings
313 WHERE key == "version"
314 ''')[0])
315 except sqlite3.OperationalError:
316 raise error.SquirrelError(
317 'Squirrel database in pre-release format found: %s\n'
318 'Please remove the database file and reindex.'
319 % self._database_path)
321 if self.version >= (1, 1, 0):
322 raise error.SquirrelError(
323 'Squirrel database "%s" is of version %i.%i.%i which '
324 'is not supported by this version of Pyrocko. Please '
325 'upgrade the Pyrocko library.'
326 % ((self._database_path, ) + self.version))
328 return
330 cursor.execute(self._register_table(
331 '''
332 CREATE TABLE IF NOT EXISTS settings (
333 key text PRIMARY KEY,
334 value text)
335 '''))
337 cursor.execute(
338 'INSERT OR IGNORE INTO settings VALUES ("version", "1.0")')
340 self.version = execute_get1(
341 cursor,
342 'SELECT value FROM settings WHERE key == "version"')
344 cursor.execute(self._register_table(
345 '''
346 CREATE TABLE IF NOT EXISTS files (
347 file_id integer PRIMARY KEY,
348 path text,
349 format text,
350 mtime float,
351 size integer)
352 '''))
354 cursor.execute(
355 '''
356 CREATE UNIQUE INDEX IF NOT EXISTS index_files_path
357 ON files (path)
358 ''')
360 cursor.execute(self._register_table(
361 '''
362 CREATE TABLE IF NOT EXISTS nuts (
363 nut_id integer PRIMARY KEY AUTOINCREMENT,
364 file_id integer,
365 file_segment integer,
366 file_element integer,
367 kind_id integer,
368 kind_codes_id integer,
369 tmin_seconds integer,
370 tmin_offset integer,
371 tmax_seconds integer,
372 tmax_offset integer,
373 kscale integer)
374 '''))
376 cursor.execute(
377 '''
378 CREATE UNIQUE INDEX IF NOT EXISTS index_nuts_file_element
379 ON nuts (file_id, file_segment, file_element)
380 ''')
382 cursor.execute(self._register_table(
383 '''
384 CREATE TABLE IF NOT EXISTS kind_codes (
385 kind_codes_id integer PRIMARY KEY,
386 kind_id integer,
387 codes text,
388 deltat float)
389 '''))
391 cursor.execute(
392 '''
393 CREATE UNIQUE INDEX IF NOT EXISTS index_kind_codes
394 ON kind_codes (kind_id, codes, deltat)
395 ''')
397 cursor.execute(self._register_table(
398 '''
399 CREATE TABLE IF NOT EXISTS kind_codes_count (
400 kind_codes_id integer PRIMARY KEY,
401 count integer)
402 '''))
404 cursor.execute(
405 '''
406 CREATE INDEX IF NOT EXISTS index_nuts_file_id
407 ON nuts (file_id)
408 ''')
410 cursor.execute(
411 '''
412 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_delete_file
413 BEFORE DELETE ON files FOR EACH ROW
414 BEGIN
415 DELETE FROM nuts where file_id == old.file_id;
416 END
417 ''')
419 # trigger only on size to make silent update of mtime possible
420 cursor.execute(
421 '''
422 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_update_file
423 BEFORE UPDATE OF size ON files FOR EACH ROW
424 BEGIN
425 DELETE FROM nuts where file_id == old.file_id;
426 END
427 ''')
429 cursor.execute(
430 '''
431 CREATE TRIGGER IF NOT EXISTS increment_kind_codes
432 BEFORE INSERT ON nuts FOR EACH ROW
433 BEGIN
434 INSERT OR IGNORE INTO kind_codes_count
435 VALUES (new.kind_codes_id, 0);
436 UPDATE kind_codes_count
437 SET count = count + 1
438 WHERE new.kind_codes_id == kind_codes_id;
439 END
440 ''')
442 cursor.execute(
443 '''
444 CREATE TRIGGER IF NOT EXISTS decrement_kind_codes
445 BEFORE DELETE ON nuts FOR EACH ROW
446 BEGIN
447 UPDATE kind_codes_count
448 SET count = count - 1
449 WHERE old.kind_codes_id == kind_codes_id;
450 END
451 ''')
453 cursor.execute(self._register_table(
454 '''
455 CREATE TABLE IF NOT EXISTS persistent (
456 name text UNIQUE)
457 '''))
459 def dig(self, nuts, transaction=None):
460 '''
461 Store or update content meta-information.
463 Given ``nuts`` are assumed to represent an up-to-date and complete
464 inventory of a set of files. Any old information about these files is
465 first pruned from the database (via database triggers). If such content
466 is part of a live selection, it is also removed there. Then the new
467 content meta-information is inserted into the main database. The
468 content is not automatically inserted into the live selections again.
469 It is in the responsibility of the selection object to perform this
470 step.
471 '''
473 nuts = list(nuts)
475 if not nuts:
476 return
478 files = set()
479 kind_codes = set()
480 for nut in nuts:
481 files.add((
482 self.relpath(nut.file_path),
483 nut.file_format,
484 nut.file_mtime,
485 nut.file_size))
486 kind_codes.add(
487 (nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0))
489 with (transaction or self.transaction('dig')) as c:
491 c.executemany(
492 'INSERT OR IGNORE INTO files VALUES (NULL,?,?,?,?)', files)
494 c.executemany(
495 '''UPDATE files SET
496 format = ?, mtime = ?, size = ?
497 WHERE path == ?
498 ''',
499 ((x[1], x[2], x[3], x[0]) for x in files))
501 c.executemany(
502 'INSERT OR IGNORE INTO kind_codes VALUES (NULL,?,?,?)',
503 kind_codes)
505 c.executemany(
506 '''
507 INSERT INTO nuts VALUES
508 (NULL, (
509 SELECT file_id FROM files
510 WHERE path == ?
511 ),?,?,?,
512 (
513 SELECT kind_codes_id FROM kind_codes
514 WHERE kind_id == ? AND codes == ? AND deltat == ?
515 ), ?,?,?,?,?)
516 ''',
517 ((self.relpath(nut.file_path),
518 nut.file_segment, nut.file_element,
519 nut.kind_id,
520 nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0,
521 nut.tmin_seconds, nut.tmin_offset,
522 nut.tmax_seconds, nut.tmax_offset,
523 nut.kscale) for nut in nuts))
525 def undig(self, path):
527 path = self.relpath(abspath(path))
529 sql = '''
530 SELECT
531 files.path,
532 files.format,
533 files.mtime,
534 files.size,
535 nuts.file_segment,
536 nuts.file_element,
537 kind_codes.kind_id,
538 kind_codes.codes,
539 nuts.tmin_seconds,
540 nuts.tmin_offset,
541 nuts.tmax_seconds,
542 nuts.tmax_offset,
543 kind_codes.deltat
544 FROM files
545 INNER JOIN nuts ON files.file_id = nuts.file_id
546 INNER JOIN kind_codes
547 ON nuts.kind_codes_id == kind_codes.kind_codes_id
548 WHERE path == ?
549 '''
551 return [Nut(values_nocheck=(self.abspath(row[0]),) + row[1:])
552 for row in self._conn.execute(sql, (path,))]
554 def undig_all(self):
555 sql = '''
556 SELECT
557 files.path,
558 files.format,
559 files.mtime,
560 files.size,
561 nuts.file_segment,
562 nuts.file_element,
563 kind_codes.kind_id,
564 kind_codes.codes,
565 nuts.tmin_seconds,
566 nuts.tmin_offset,
567 nuts.tmax_seconds,
568 nuts.tmax_offset,
569 kind_codes.deltat
570 FROM files
571 INNER JOIN nuts ON files.file_id == nuts.file_id
572 INNER JOIN kind_codes
573 ON nuts.kind_codes_id == kind_codes.kind_codes_id
574 '''
576 nuts = []
577 path = None
578 for values in self._conn.execute(sql):
579 if path is not None and values[0] != path:
580 yield path, nuts
581 nuts = []
583 path = self.abspath(values[0])
585 if values[1] is not None:
586 nuts.append(Nut(values_nocheck=(path,) + values[1:]))
588 if path is not None:
589 yield path, nuts
591 def undig_few(self, paths, format='detect'):
592 for path in paths:
593 nuts = self.undig(path)
594 if nuts:
595 yield (nuts[0].file_format, path), nuts
596 else:
597 yield (format, path), []
599 def undig_many(self, paths, show_progress=True):
600 selection = self.new_selection(paths, show_progress=show_progress)
602 for (_, path), nuts in selection.undig_grouped():
603 yield path, nuts
605 del selection
607 def new_selection(self, paths=None, format='detect', show_progress=True):
608 from .selection import Selection
609 selection = Selection(self)
610 if paths:
611 selection.add(paths, format=format, show_progress=show_progress)
612 return selection
614 def undig_content(self, nut):
615 return None
617 def remove(self, path):
618 '''
619 Prune content meta-information about a given file.
621 All content pieces belonging to file ``path`` are removed from the
622 main database and any attached live selections (via database triggers).
623 '''
625 path = self.relpath(abspath(path))
627 with self.transaction('remove file') as cursor:
628 cursor.execute(
629 'DELETE FROM files WHERE path = ?', (path,))
631 def remove_glob(self, pattern):
632 '''
633 Prune content meta-information about files matching given pattern.
635 All content pieces belonging to files who's pathes match the given
636 ``pattern`` are removed from the main database and any attached live
637 selections (via database triggers).
638 '''
640 with self.transaction('remove file glob') as cursor:
641 return cursor.execute(
642 'DELETE FROM files WHERE path GLOB ?', (pattern,)).rowcount
644 def _remove_volatile(self):
645 '''
646 Prune leftover volatile content from database.
648 If the cleanup handler of an attached selection is not called, e.g. due
649 to a crash or terminated process, volatile content will not be removed
650 properly. This method will delete such leftover entries.
652 This is a mainenance operatation which should only be called when no
653 apps are using the database because it would remove volatile content
654 currently used by the apps.
655 '''
657 with self.transaction('remove volatile') as cursor:
658 return cursor.execute(
659 '''
660 DELETE FROM files
661 WHERE path LIKE 'virtual:volatile:%'
662 ''').rowcount
664 def reset(self, path, transaction=None):
665 '''
666 Prune information associated with a given file, but keep the file path.
668 This method is called when reading a file failed. File attributes,
669 format, size and modification time are set to NULL. File content
670 meta-information is removed from the database and any attached live
671 selections (via database triggers).
672 '''
674 path = self.relpath(abspath(path))
676 with (transaction or self.transaction('reset file')) as cursor:
677 cursor.execute(
678 '''
679 UPDATE files SET
680 format = NULL,
681 mtime = NULL,
682 size = NULL
683 WHERE path = ?
684 ''', (path,))
686 def silent_touch(self, path):
687 '''
688 Update modification time of file without initiating reindexing.
690 Useful to prolong validity period of data with expiration date.
691 '''
693 apath = abspath(path)
694 path = self.relpath(apath)
696 with self.transaction('silent touch') as cursor:
698 sql = 'SELECT format, size FROM files WHERE path = ?'
699 fmt, size = execute_get1(cursor, sql, (path,))
701 mod = io.get_backend(fmt)
702 mod.touch(apath)
703 file_stats = mod.get_stats(apath)
705 if file_stats[1] != size:
706 raise FileLoadError(
707 'Silent update for file "%s" failed: size has changed.'
708 % apath)
710 sql = '''
711 UPDATE files
712 SET mtime = ?
713 WHERE path = ?
714 '''
715 cursor.execute(sql, (file_stats[0], path))
717 def _iter_codes_info(
718 self, kind=None, codes=None, kind_codes_count='kind_codes_count'):
720 args = []
721 sel = ''
722 if kind is not None:
723 kind_id = to_kind_id(kind)
725 sel = 'AND kind_codes.kind_id == ?'
726 args.append(to_kind_id(kind))
728 if codes is not None:
729 assert kind is not None # TODO supp by recursing possible kinds
730 kind_id = to_kind_id(kind)
731 pats = codes_patterns_for_kind(kind_id, codes)
733 if pats:
734 # could optimize this by using IN for non-patterns
735 sel += ' AND ( %s ) ' % ' OR '.join(
736 ('kind_codes.codes GLOB ?',) * len(pats))
738 args.extend(pat.safe_str for pat in pats)
740 sql = ('''
741 SELECT
742 kind_codes.kind_id,
743 kind_codes.codes,
744 kind_codes.deltat,
745 kind_codes.kind_codes_id,
746 %(kind_codes_count)s.count
747 FROM %(kind_codes_count)s
748 INNER JOIN kind_codes
749 ON %(kind_codes_count)s.kind_codes_id
750 == kind_codes.kind_codes_id
751 WHERE %(kind_codes_count)s.count > 0
752 ''' + sel + '''
753 ''') % {'kind_codes_count': kind_codes_count}
755 for kind_id, scodes, deltat, kcid, count in self._conn.execute(
756 sql, args):
758 yield (
759 kind_id, to_codes_simple(kind_id, scodes), deltat, kcid, count)
761 def _iter_deltats(self, kind=None, kind_codes_count='kind_codes_count'):
762 args = []
763 sel = ''
764 if kind is not None:
765 assert isinstance(kind, str)
766 sel = 'AND kind_codes.kind_id == ?'
767 args.append(to_kind_id(kind))
769 sql = ('''
770 SELECT DISTINCT kind_codes.deltat FROM %(kind_codes_count)s
771 INNER JOIN kind_codes
772 ON %(kind_codes_count)s.kind_codes_id
773 == kind_codes.kind_codes_id
774 WHERE %(kind_codes_count)s.count > 0
775 ''' + sel + '''
776 ORDER BY kind_codes.deltat
777 ''') % {'kind_codes_count': kind_codes_count}
779 for row in self._conn.execute(sql, args):
780 yield row[0]
782 def _iter_codes(self, kind=None, kind_codes_count='kind_codes_count'):
783 args = []
784 sel = ''
785 if kind is not None:
786 assert isinstance(kind, str)
787 sel = 'AND kind_codes.kind_id == ?'
788 args.append(to_kind_id(kind))
790 sql = ('''
791 SELECT DISTINCT kind_codes.kind_id, kind_codes.codes
792 FROM %(kind_codes_count)s
793 INNER JOIN kind_codes
794 ON %(kind_codes_count)s.kind_codes_id
795 == kind_codes.kind_codes_id
796 WHERE %(kind_codes_count)s.count > 0
797 ''' + sel + '''
798 ORDER BY kind_codes.codes
800 ''') % dict(kind_codes_count=kind_codes_count)
802 for row in self._conn.execute(sql, args):
803 yield to_codes_simple(*row)
805 def _iter_kinds(self, codes=None, kind_codes_count='kind_codes_count'):
806 args = []
807 sel = ''
808 if codes is not None:
809 sel = 'AND kind_codes.codes == ?'
810 args.append(codes.safe_str)
812 sql = ('''
813 SELECT DISTINCT kind_codes.kind_id FROM %(kind_codes_count)s
814 INNER JOIN kind_codes
815 ON %(kind_codes_count)s.kind_codes_id
816 == kind_codes.kind_codes_id
817 WHERE %(kind_codes_count)s.count > 0
818 ''' + sel + '''
819 ORDER BY kind_codes.kind_id
820 ''') % {'kind_codes_count': kind_codes_count}
822 for row in self._conn.execute(sql, args):
823 yield to_kind(row[0])
825 def iter_paths(self):
826 for row in self._conn.execute('''SELECT path FROM files'''):
827 yield self.abspath(row[0])
829 def iter_nnuts_by_file(self):
830 sql = '''
831 SELECT
832 path,
833 (SELECT COUNT(*) FROM nuts WHERE nuts.file_id = files.file_id)
834 FROM files
835 '''
836 for row in self._conn.execute(sql):
837 yield (self.abspath(row[0]),) + row[1:]
839 def iter_kinds(self, codes=None):
840 return self._iter_kinds(codes=codes)
842 def iter_codes(self, kind=None):
843 return self._iter_codes(kind=kind)
845 def get_paths(self):
846 return list(self.iter_paths())
848 def get_kinds(self, codes=None):
849 return list(self.iter_kinds(codes=codes))
851 def get_codes(self, kind=None):
852 return list(self.iter_codes(kind=kind))
854 def get_counts(self, kind=None):
855 d = {}
856 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind):
857 if kind_id not in d:
858 v = d[kind_id] = {}
859 else:
860 v = d[kind_id]
862 if codes not in v:
863 v[codes] = 0
865 v[codes] += count
867 if kind is not None:
868 return d[to_kind_id(kind)]
869 else:
870 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items())
872 def get_nfiles(self):
873 sql = '''SELECT COUNT(*) FROM files'''
874 for row in self._conn.execute(sql):
875 return row[0]
877 def get_nnuts(self):
878 sql = '''SELECT COUNT(*) FROM nuts'''
879 for row in self._conn.execute(sql):
880 return row[0]
882 def get_nnuts_by_file(self):
883 return list(self.iter_nnuts_by_file())
885 def get_total_size(self):
886 sql = '''
887 SELECT SUM(files.size) FROM files
888 '''
890 for row in self._conn.execute(sql):
891 return row[0] or 0
893 def get_persistent_names(self):
894 sql = '''
895 SELECT name FROM persistent
896 '''
897 return [row[0] for row in self._conn.execute(sql)]
899 def get_stats(self):
900 return DatabaseStats(
901 nfiles=self.get_nfiles(),
902 nnuts=self.get_nnuts(),
903 kinds=self.get_kinds(),
904 codes=self.get_codes(),
905 counts=self.get_counts(),
906 total_size=self.get_total_size(),
907 persistent=self.get_persistent_names())
909 def __str__(self):
910 return str(self.get_stats())
912 def print_tables(self, stream=None):
913 for table in [
914 'persistent',
915 'files',
916 'nuts',
917 'kind_codes',
918 'kind_codes_count']:
920 self.print_table(table, stream=stream)
922 def print_table(self, name, stream=None):
924 if stream is None:
925 stream = sys.stdout
927 class hstr(str):
928 def __repr__(self):
929 return self
931 w = stream.write
932 w('\n')
933 w('\n')
934 w(name)
935 w('\n')
936 sql = 'SELECT * FROM %s' % name
937 tab = []
938 if name in self._tables:
939 headers = self._tables[name]
940 tab.append([None for _ in headers])
941 tab.append([hstr(x[0]) for x in headers])
942 tab.append([hstr(x[1]) for x in headers])
943 tab.append([None for _ in headers])
945 for row in self._conn.execute(sql):
946 tab.append([x for x in row])
948 widths = [
949 max((len(repr(x)) if x is not None else 0) for x in col)
950 for col in zip(*tab)]
952 for row in tab:
953 w(' '.join(
954 (repr(x).ljust(wid) if x is not None else ''.ljust(wid, '-'))
955 for (x, wid) in zip(row, widths)))
957 w('\n')
959 w('\n')
962class DatabaseStats(Object):
963 '''
964 Container to hold statistics about contents cached in meta-information db.
965 '''
967 nfiles = Int.T(
968 help='Number of files in database.')
969 nnuts = Int.T(
970 help='Number of index nuts in database.')
971 codes = List.T(
972 Tuple.T(content_t=String.T()),
973 help='Available code sequences in database, e.g. '
974 '(agency, network, station, location) for stations nuts.')
975 kinds = List.T(
976 String.T(),
977 help='Available content types in database.')
978 total_size = Int.T(
979 help='Aggregated file size [bytes] of files referenced in database.')
980 counts = Dict.T(
981 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()),
982 help='Breakdown of how many nuts of any content type and code '
983 'sequence are available in database, ``counts[kind][codes]``.')
984 persistent = List.T(
985 String.T(),
986 help='Names of persistent selections stored in database.')
988 def __str__(self):
989 kind_counts = dict(
990 (kind, sum(self.counts[kind].values())) for kind in self.kinds)
992 codes = [c.safe_str for c in self.codes]
994 if len(codes) > 20:
995 scodes = '\n' + util.ewrap(codes[:10], indent=' ') \
996 + '\n [%i more]\n' % (len(codes) - 20) \
997 + util.ewrap(codes[-10:], indent=' ')
998 else:
999 scodes = '\n' + util.ewrap(codes, indent=' ') \
1000 if codes else '<none>'
1002 s = '''
1003Available codes: %s
1004Number of files: %i
1005Total size of known files: %s
1006Number of index nuts: %i
1007Available content kinds: %s
1008Persistent selections: %s''' % (
1009 scodes,
1010 self.nfiles,
1011 util.human_bytesize(self.total_size),
1012 self.nnuts,
1013 ', '.join('%s: %i' % (
1014 kind, kind_counts[kind]) for kind in sorted(self.kinds)),
1015 ', '.join(self.persistent))
1017 return s
1020__all__ = [
1021 'Database',
1022 'DatabaseStats',
1023]