1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6import sys
7import os
8import logging
9import sqlite3
10import re
11import time
12import types
13import weakref
15from pyrocko.io.io_common import FileLoadError
16from pyrocko import util
17from pyrocko.guts import Object, Int, List, Dict, Tuple, String
18from . import error, io
19from .model import Nut, to_kind_id, to_kind, to_codes_simple, \
20 codes_patterns_for_kind
21from .error import SquirrelError
23logger = logging.getLogger('psq.database')
25guts_prefix = 'squirrel'
28def abspath(path):
29 if not path.startswith('virtual:') and not path.startswith('client:'):
30 return os.path.abspath(path)
31 else:
32 return path
35def versiontuple(s):
36 fill = [0, 0, 0]
37 vals = [int(x) for x in s.split('.')]
38 fill[:len(vals)] = vals
39 return tuple(fill)
42class ExecuteGet1Error(SquirrelError):
43 pass
46def execute_get1(connection, sql, args=()):
47 rows = list(connection.execute(sql, args))
48 if len(rows) == 1:
49 return rows[0]
50 else:
51 raise ExecuteGet1Error('Expected database entry not found.')
54g_databases = {}
57def get_database(path):
58 path = os.path.abspath(path)
59 if path not in g_databases:
60 g_databases[path] = Database(path)
62 return g_databases[path]
65def close_database(database):
66 path = os.path.abspath(database._database_path)
67 database._conn.close()
68 if path in g_databases:
69 del g_databases[path]
72class Transaction(object):
73 def __init__(
74 self, conn,
75 label='',
76 mode='immediate',
77 retry_interval=0.1,
78 callback=None):
80 self.cursor = conn.cursor()
81 assert mode in ('deferred', 'immediate', 'exclusive')
82 self.mode = mode
83 self.depth = 0
84 self.rollback_wanted = False
85 self.retry_interval = retry_interval
86 self.callback = callback
87 self.label = label
88 self.started = False
90 def begin(self):
91 if self.depth == 0:
92 tries = 0
93 while True:
94 try:
95 tries += 1
96 self.cursor.execute('BEGIN %s' % self.mode.upper())
97 self.started = True
98 logger.debug(
99 'Transaction started: %-30s (pid: %s, mode: %s)'
100 % (self.label, os.getpid(), self.mode))
102 self.total_changes_begin \
103 = self.cursor.connection.total_changes
104 break
106 except sqlite3.OperationalError as e:
107 if not str(e) == 'database is locked':
108 raise
110 logger.info(
111 'Database is locked retrying in %s s: %s '
112 '(pid: %s, tries: %i)' % (
113 self.retry_interval, self.label,
114 os.getpid(), tries))
116 time.sleep(self.retry_interval)
118 self.depth += 1
120 def commit(self):
121 if not self.started:
122 raise Exception(
123 'Trying to commit without having started a transaction.')
125 self.depth -= 1
126 if self.depth == 0:
127 if not self.rollback_wanted:
128 self.cursor.execute('COMMIT')
129 self.started = False
130 if self.total_changes_begin is not None:
131 total_changes = self.cursor.connection.total_changes \
132 - self.total_changes_begin
133 else:
134 total_changes = None
136 if self.callback is not None and total_changes:
137 self.callback('modified', total_changes)
139 logger.debug(
140 'Transaction completed: %-30s '
141 '(pid: %s, changes: %i)' % (
142 self.label, os.getpid(), total_changes or 0))
144 else:
145 self.cursor.execute('ROLLBACK')
146 self.started = False
147 logger.warning('Deferred rollback executed.')
148 logger.debug(
149 'Transaction failed: %-30s (pid: %s)' % (
150 self.label, os.getpid()))
151 self.rollback_wanted = False
153 def rollback(self):
154 if not self.started:
155 raise Exception(
156 'Trying to rollback without having started a transaction.')
158 self.depth -= 1
159 if self.depth == 0:
160 self.cursor.execute('ROLLBACK')
161 self.started = False
163 logger.debug(
164 'Transaction failed: %-30s (pid: %s)' % (
165 self.label, os.getpid()))
167 self.rollback_wanted = False
168 else:
169 logger.warning('Deferred rollback scheduled.')
170 self.rollback_wanted = True
172 def close(self):
173 self.cursor.close()
175 def __enter__(self):
176 self.begin()
177 return self.cursor
179 def __exit__(self, exc_type, exc_value, traceback):
180 if exc_type is None:
181 self.commit()
182 else:
183 self.rollback()
185 if self.depth == 0:
186 self.close()
187 self.callback = None
190class Database(object):
191 '''
192 Shared meta-information database used by Squirrel.
193 '''
195 def __init__(self, database_path=':memory:', log_statements=False):
196 self._database_path = database_path
197 if database_path != ':memory:':
198 util.ensuredirs(database_path)
200 try:
201 logger.debug('Opening connection to database: %s' % database_path)
202 self._conn = sqlite3.connect(database_path, isolation_level=None)
203 except sqlite3.OperationalError:
204 raise error.SquirrelError(
205 'Cannot connect to database: %s' % database_path)
207 self._conn.text_factory = str
208 self._tables = {}
210 if log_statements:
211 self._conn.set_trace_callback(self._log_statement)
213 self._listeners = []
214 self._initialize_db()
215 self._basepath = None
217 self.version = None
219 def set_basepath(self, basepath):
220 if basepath is not None:
221 self._basepath = os.path.abspath(basepath)
222 else:
223 self._basepath = None
225 def relpath(self, path):
226 if self._basepath is not None and path.startswith(
227 self._basepath + os.path.sep):
228 return path[len(self._basepath) + 1:]
229 else:
230 return path
232 def abspath(self, path):
233 if self._basepath is not None and not path.startswith('virtual:') \
234 and not path.startswith('client:') \
235 and not os.path.isabs(path):
236 return os.path.join(self._basepath, path)
237 else:
238 return path
240 def _log_statement(self, statement):
241 logger.debug(statement)
243 def get_connection(self):
244 return self._conn
246 def transaction(self, label='', mode='immediate'):
247 return Transaction(
248 self._conn,
249 label=label,
250 mode=mode,
251 callback=self._notify_listeners)
253 def add_listener(self, listener):
254 if isinstance(listener, types.MethodType):
255 listener_ref = weakref.WeakMethod(listener)
256 else:
257 listener_ref = weakref.ref(listener)
259 self._listeners.append(listener_ref)
260 return listener_ref
262 def remove_listener(self, listener_ref):
263 self._listeners.remove(listener_ref)
265 def _notify_listeners(self, event, *args):
266 dead = []
267 for listener_ref in self._listeners:
268 listener = listener_ref()
269 if listener is not None:
270 listener(event, *args)
271 else:
272 dead.append(listener_ref)
274 for listener_ref in dead:
275 self.remove_listener(listener_ref)
277 def _register_table(self, s):
278 m = re.search(r'(\S+)\s*\(([^)]+)\)', s)
279 table_name = m.group(1)
280 dtypes = m.group(2)
281 table_header = []
282 for dele in dtypes.split(','):
283 table_header.append(dele.split()[:2])
285 self._tables[table_name] = table_header
287 return s
289 def _initialize_db(self):
290 with self.transaction('initialize') as cursor:
291 cursor.execute(
292 '''PRAGMA recursive_triggers = true''')
294 cursor.execute(
295 '''PRAGMA busy_timeout = 30000''')
297 if 2 == len(list(
298 cursor.execute(
299 '''
300 SELECT name FROM sqlite_master
301 WHERE type = 'table' AND name IN (
302 'files',
303 'persistent')
304 '''))):
306 try:
307 self.version = versiontuple(execute_get1(
308 cursor,
309 '''
310 SELECT value FROM settings
311 WHERE key == "version"
312 ''')[0])
313 except sqlite3.OperationalError:
314 raise error.SquirrelError(
315 'Squirrel database in pre-release format found: %s\n'
316 'Please remove the database file and reindex.'
317 % self._database_path)
319 if self.version >= (1, 1, 0):
320 raise error.SquirrelError(
321 'Squirrel database "%s" is of version %i.%i.%i which '
322 'is not supported by this version of Pyrocko. Please '
323 'upgrade the Pyrocko library.'
324 % ((self._database_path, ) + self.version))
326 return
328 cursor.execute(self._register_table(
329 '''
330 CREATE TABLE IF NOT EXISTS settings (
331 key text PRIMARY KEY,
332 value text)
333 '''))
335 cursor.execute(
336 'INSERT OR IGNORE INTO settings VALUES ("version", "1.0")')
338 self.version = execute_get1(
339 cursor,
340 'SELECT value FROM settings WHERE key == "version"')
342 cursor.execute(self._register_table(
343 '''
344 CREATE TABLE IF NOT EXISTS files (
345 file_id integer PRIMARY KEY,
346 path text,
347 format text,
348 mtime float,
349 size integer)
350 '''))
352 cursor.execute(
353 '''
354 CREATE UNIQUE INDEX IF NOT EXISTS index_files_path
355 ON files (path)
356 ''')
358 cursor.execute(self._register_table(
359 '''
360 CREATE TABLE IF NOT EXISTS nuts (
361 nut_id integer PRIMARY KEY AUTOINCREMENT,
362 file_id integer,
363 file_segment integer,
364 file_element integer,
365 kind_id integer,
366 kind_codes_id integer,
367 tmin_seconds integer,
368 tmin_offset integer,
369 tmax_seconds integer,
370 tmax_offset integer,
371 kscale integer)
372 '''))
374 cursor.execute(
375 '''
376 CREATE UNIQUE INDEX IF NOT EXISTS index_nuts_file_element
377 ON nuts (file_id, file_segment, file_element)
378 ''')
380 cursor.execute(self._register_table(
381 '''
382 CREATE TABLE IF NOT EXISTS kind_codes (
383 kind_codes_id integer PRIMARY KEY,
384 kind_id integer,
385 codes text,
386 deltat float)
387 '''))
389 cursor.execute(
390 '''
391 CREATE UNIQUE INDEX IF NOT EXISTS index_kind_codes
392 ON kind_codes (kind_id, codes, deltat)
393 ''')
395 cursor.execute(self._register_table(
396 '''
397 CREATE TABLE IF NOT EXISTS kind_codes_count (
398 kind_codes_id integer PRIMARY KEY,
399 count integer)
400 '''))
402 cursor.execute(
403 '''
404 CREATE INDEX IF NOT EXISTS index_nuts_file_id
405 ON nuts (file_id)
406 ''')
408 cursor.execute(
409 '''
410 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_delete_file
411 BEFORE DELETE ON files FOR EACH ROW
412 BEGIN
413 DELETE FROM nuts where file_id == old.file_id;
414 END
415 ''')
417 # trigger only on size to make silent update of mtime possible
418 cursor.execute(
419 '''
420 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_update_file
421 BEFORE UPDATE OF size ON files FOR EACH ROW
422 BEGIN
423 DELETE FROM nuts where file_id == old.file_id;
424 END
425 ''')
427 cursor.execute(
428 '''
429 CREATE TRIGGER IF NOT EXISTS increment_kind_codes
430 BEFORE INSERT ON nuts FOR EACH ROW
431 BEGIN
432 INSERT OR IGNORE INTO kind_codes_count
433 VALUES (new.kind_codes_id, 0);
434 UPDATE kind_codes_count
435 SET count = count + 1
436 WHERE new.kind_codes_id == kind_codes_id;
437 END
438 ''')
440 cursor.execute(
441 '''
442 CREATE TRIGGER IF NOT EXISTS decrement_kind_codes
443 BEFORE DELETE ON nuts FOR EACH ROW
444 BEGIN
445 UPDATE kind_codes_count
446 SET count = count - 1
447 WHERE old.kind_codes_id == kind_codes_id;
448 END
449 ''')
451 cursor.execute(self._register_table(
452 '''
453 CREATE TABLE IF NOT EXISTS persistent (
454 name text UNIQUE)
455 '''))
457 def dig(self, nuts, transaction=None):
458 '''
459 Store or update content meta-information.
461 Given ``nuts`` are assumed to represent an up-to-date and complete
462 inventory of a set of files. Any old information about these files is
463 first pruned from the database (via database triggers). If such content
464 is part of a live selection, it is also removed there. Then the new
465 content meta-information is inserted into the main database. The
466 content is not automatically inserted into the live selections again.
467 It is in the responsibility of the selection object to perform this
468 step.
469 '''
471 nuts = list(nuts)
473 if not nuts:
474 return
476 files = set()
477 kind_codes = set()
478 for nut in nuts:
479 files.add((
480 self.relpath(nut.file_path),
481 nut.file_format,
482 nut.file_mtime,
483 nut.file_size))
484 kind_codes.add(
485 (nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0))
487 with (transaction or self.transaction('dig')) as c:
489 c.executemany(
490 'INSERT OR IGNORE INTO files VALUES (NULL,?,?,?,?)', files)
492 c.executemany(
493 '''UPDATE files SET
494 format = ?, mtime = ?, size = ?
495 WHERE path == ?
496 ''',
497 ((x[1], x[2], x[3], x[0]) for x in files))
499 c.executemany(
500 'INSERT OR IGNORE INTO kind_codes VALUES (NULL,?,?,?)',
501 kind_codes)
503 c.executemany(
504 '''
505 INSERT INTO nuts VALUES
506 (NULL, (
507 SELECT file_id FROM files
508 WHERE path == ?
509 ),?,?,?,
510 (
511 SELECT kind_codes_id FROM kind_codes
512 WHERE kind_id == ? AND codes == ? AND deltat == ?
513 ), ?,?,?,?,?)
514 ''',
515 ((self.relpath(nut.file_path),
516 nut.file_segment, nut.file_element,
517 nut.kind_id,
518 nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0,
519 nut.tmin_seconds, nut.tmin_offset,
520 nut.tmax_seconds, nut.tmax_offset,
521 nut.kscale) for nut in nuts))
523 def undig(self, path):
525 path = self.relpath(abspath(path))
527 sql = '''
528 SELECT
529 files.path,
530 files.format,
531 files.mtime,
532 files.size,
533 nuts.file_segment,
534 nuts.file_element,
535 kind_codes.kind_id,
536 kind_codes.codes,
537 nuts.tmin_seconds,
538 nuts.tmin_offset,
539 nuts.tmax_seconds,
540 nuts.tmax_offset,
541 kind_codes.deltat
542 FROM files
543 INNER JOIN nuts ON files.file_id = nuts.file_id
544 INNER JOIN kind_codes
545 ON nuts.kind_codes_id == kind_codes.kind_codes_id
546 WHERE path == ?
547 '''
549 return [Nut(values_nocheck=(self.abspath(row[0]),) + row[1:])
550 for row in self._conn.execute(sql, (path,))]
552 def undig_all(self):
553 sql = '''
554 SELECT
555 files.path,
556 files.format,
557 files.mtime,
558 files.size,
559 nuts.file_segment,
560 nuts.file_element,
561 kind_codes.kind_id,
562 kind_codes.codes,
563 nuts.tmin_seconds,
564 nuts.tmin_offset,
565 nuts.tmax_seconds,
566 nuts.tmax_offset,
567 kind_codes.deltat
568 FROM files
569 INNER JOIN nuts ON files.file_id == nuts.file_id
570 INNER JOIN kind_codes
571 ON nuts.kind_codes_id == kind_codes.kind_codes_id
572 '''
574 nuts = []
575 path = None
576 for values in self._conn.execute(sql):
577 if path is not None and values[0] != path:
578 yield path, nuts
579 nuts = []
581 path = self.abspath(values[0])
583 if values[1] is not None:
584 nuts.append(Nut(values_nocheck=(path,) + values[1:]))
586 if path is not None:
587 yield path, nuts
589 def undig_few(self, paths, format='detect'):
590 for path in paths:
591 nuts = self.undig(path)
592 if nuts:
593 yield (nuts[0].file_format, path), nuts
594 else:
595 yield (format, path), []
597 def undig_many(self, paths, show_progress=True):
598 selection = self.new_selection(paths, show_progress=show_progress)
600 for (_, path), nuts in selection.undig_grouped():
601 yield path, nuts
603 del selection
605 def new_selection(self, paths=None, format='detect', show_progress=True):
606 from .selection import Selection
607 selection = Selection(self)
608 if paths:
609 selection.add(paths, format=format, show_progress=show_progress)
610 return selection
612 def undig_content(self, nut):
613 return None
615 def remove(self, path):
616 '''
617 Prune content meta-information about a given file.
619 All content pieces belonging to file ``path`` are removed from the
620 main database and any attached live selections (via database triggers).
621 '''
623 path = self.relpath(abspath(path))
625 with self.transaction('remove file') as cursor:
626 cursor.execute(
627 'DELETE FROM files WHERE path = ?', (path,))
629 def remove_glob(self, pattern):
630 '''
631 Prune content meta-information about files matching given pattern.
633 All content pieces belonging to files who's pathes match the given
634 ``pattern`` are removed from the main database and any attached live
635 selections (via database triggers).
636 '''
638 with self.transaction('remove file glob') as cursor:
639 return cursor.execute(
640 'DELETE FROM files WHERE path GLOB ?', (pattern,)).rowcount
642 def _remove_volatile(self):
643 '''
644 Prune leftover volatile content from database.
646 If the cleanup handler of an attached selection is not called, e.g. due
647 to a crash or terminated process, volatile content will not be removed
648 properly. This method will delete such leftover entries.
650 This is a mainenance operatation which should only be called when no
651 apps are using the database because it would remove volatile content
652 currently used by the apps.
653 '''
655 with self.transaction('remove volatile') as cursor:
656 return cursor.execute(
657 '''
658 DELETE FROM files
659 WHERE path LIKE 'virtual:volatile:%'
660 ''').rowcount
662 def reset(self, path, transaction=None):
663 '''
664 Prune information associated with a given file, but keep the file path.
666 This method is called when reading a file failed. File attributes,
667 format, size and modification time are set to NULL. File content
668 meta-information is removed from the database and any attached live
669 selections (via database triggers).
670 '''
672 path = self.relpath(abspath(path))
674 with (transaction or self.transaction('reset file')) as cursor:
675 cursor.execute(
676 '''
677 UPDATE files SET
678 format = NULL,
679 mtime = NULL,
680 size = NULL
681 WHERE path = ?
682 ''', (path,))
684 def silent_touch(self, path):
685 '''
686 Update modification time of file without initiating reindexing.
688 Useful to prolong validity period of data with expiration date.
689 '''
691 apath = abspath(path)
692 path = self.relpath(apath)
694 with self.transaction('silent touch') as cursor:
696 sql = 'SELECT format, size FROM files WHERE path = ?'
697 fmt, size = execute_get1(cursor, sql, (path,))
699 mod = io.get_backend(fmt)
700 mod.touch(apath)
701 file_stats = mod.get_stats(apath)
703 if file_stats[1] != size:
704 raise FileLoadError(
705 'Silent update for file "%s" failed: size has changed.'
706 % apath)
708 sql = '''
709 UPDATE files
710 SET mtime = ?
711 WHERE path = ?
712 '''
713 cursor.execute(sql, (file_stats[0], path))
715 def _iter_codes_info(
716 self, kind=None, codes=None, kind_codes_count='kind_codes_count'):
718 args = []
719 sel = ''
720 if kind is not None:
721 kind_id = to_kind_id(kind)
723 sel = 'AND kind_codes.kind_id == ?'
724 args.append(to_kind_id(kind))
726 if codes is not None:
727 assert kind is not None # TODO supp by recursing possible kinds
728 kind_id = to_kind_id(kind)
729 pats = codes_patterns_for_kind(kind_id, codes)
731 if pats:
732 # could optimize this by using IN for non-patterns
733 sel += ' AND ( %s ) ' % ' OR '.join(
734 ('kind_codes.codes GLOB ?',) * len(pats))
736 args.extend(pat.safe_str for pat in pats)
738 sql = ('''
739 SELECT
740 kind_codes.kind_id,
741 kind_codes.codes,
742 kind_codes.deltat,
743 kind_codes.kind_codes_id,
744 %(kind_codes_count)s.count
745 FROM %(kind_codes_count)s
746 INNER JOIN kind_codes
747 ON %(kind_codes_count)s.kind_codes_id
748 == kind_codes.kind_codes_id
749 WHERE %(kind_codes_count)s.count > 0
750 ''' + sel + '''
751 ''') % {'kind_codes_count': kind_codes_count}
753 for kind_id, scodes, deltat, kcid, count in self._conn.execute(
754 sql, args):
756 yield (
757 kind_id, to_codes_simple(kind_id, scodes), deltat, kcid, count)
759 def _iter_deltats(self, kind=None, kind_codes_count='kind_codes_count'):
760 args = []
761 sel = ''
762 if kind is not None:
763 assert isinstance(kind, str)
764 sel = 'AND kind_codes.kind_id == ?'
765 args.append(to_kind_id(kind))
767 sql = ('''
768 SELECT DISTINCT kind_codes.deltat FROM %(kind_codes_count)s
769 INNER JOIN kind_codes
770 ON %(kind_codes_count)s.kind_codes_id
771 == kind_codes.kind_codes_id
772 WHERE %(kind_codes_count)s.count > 0
773 ''' + sel + '''
774 ORDER BY kind_codes.deltat
775 ''') % {'kind_codes_count': kind_codes_count}
777 for row in self._conn.execute(sql, args):
778 yield row[0]
780 def _iter_codes(self, kind=None, kind_codes_count='kind_codes_count'):
781 args = []
782 sel = ''
783 if kind is not None:
784 assert isinstance(kind, str)
785 sel = 'AND kind_codes.kind_id == ?'
786 args.append(to_kind_id(kind))
788 sql = ('''
789 SELECT DISTINCT kind_codes.kind_id, kind_codes.codes
790 FROM %(kind_codes_count)s
791 INNER JOIN kind_codes
792 ON %(kind_codes_count)s.kind_codes_id
793 == kind_codes.kind_codes_id
794 WHERE %(kind_codes_count)s.count > 0
795 ''' + sel + '''
796 ORDER BY kind_codes.codes
798 ''') % dict(kind_codes_count=kind_codes_count)
800 for row in self._conn.execute(sql, args):
801 yield to_codes_simple(*row)
803 def _iter_kinds(self, codes=None, kind_codes_count='kind_codes_count'):
804 args = []
805 sel = ''
806 if codes is not None:
807 sel = 'AND kind_codes.codes == ?'
808 args.append(codes.safe_str)
810 sql = ('''
811 SELECT DISTINCT kind_codes.kind_id FROM %(kind_codes_count)s
812 INNER JOIN kind_codes
813 ON %(kind_codes_count)s.kind_codes_id
814 == kind_codes.kind_codes_id
815 WHERE %(kind_codes_count)s.count > 0
816 ''' + sel + '''
817 ORDER BY kind_codes.kind_id
818 ''') % {'kind_codes_count': kind_codes_count}
820 for row in self._conn.execute(sql, args):
821 yield to_kind(row[0])
823 def iter_paths(self):
824 for row in self._conn.execute('''SELECT path FROM files'''):
825 yield self.abspath(row[0])
827 def iter_nnuts_by_file(self):
828 sql = '''
829 SELECT
830 path,
831 (SELECT COUNT(*) FROM nuts WHERE nuts.file_id = files.file_id)
832 FROM files
833 '''
834 for row in self._conn.execute(sql):
835 yield (self.abspath(row[0]),) + row[1:]
837 def iter_kinds(self, codes=None):
838 return self._iter_kinds(codes=codes)
840 def iter_codes(self, kind=None):
841 return self._iter_codes(kind=kind)
843 def get_paths(self):
844 return list(self.iter_paths())
846 def get_kinds(self, codes=None):
847 return list(self.iter_kinds(codes=codes))
849 def get_codes(self, kind=None):
850 return list(self.iter_codes(kind=kind))
852 def get_counts(self, kind=None):
853 d = {}
854 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind):
855 if kind_id not in d:
856 v = d[kind_id] = {}
857 else:
858 v = d[kind_id]
860 if codes not in v:
861 v[codes] = 0
863 v[codes] += count
865 if kind is not None:
866 return d[to_kind_id(kind)]
867 else:
868 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items())
870 def get_nfiles(self):
871 sql = '''SELECT COUNT(*) FROM files'''
872 for row in self._conn.execute(sql):
873 return row[0]
875 def get_nnuts(self):
876 sql = '''SELECT COUNT(*) FROM nuts'''
877 for row in self._conn.execute(sql):
878 return row[0]
880 def get_nnuts_by_file(self):
881 return list(self.iter_nnuts_by_file())
883 def get_total_size(self):
884 sql = '''
885 SELECT SUM(files.size) FROM files
886 '''
888 for row in self._conn.execute(sql):
889 return row[0] or 0
891 def get_persistent_names(self):
892 sql = '''
893 SELECT name FROM persistent
894 '''
895 return [row[0] for row in self._conn.execute(sql)]
897 def get_stats(self):
898 return DatabaseStats(
899 nfiles=self.get_nfiles(),
900 nnuts=self.get_nnuts(),
901 kinds=self.get_kinds(),
902 codes=self.get_codes(),
903 counts=self.get_counts(),
904 total_size=self.get_total_size(),
905 persistent=self.get_persistent_names())
907 def __str__(self):
908 return str(self.get_stats())
910 def print_tables(self, stream=None):
911 for table in [
912 'persistent',
913 'files',
914 'nuts',
915 'kind_codes',
916 'kind_codes_count']:
918 self.print_table(table, stream=stream)
920 def print_table(self, name, stream=None):
922 if stream is None:
923 stream = sys.stdout
925 class hstr(str):
926 def __repr__(self):
927 return self
929 w = stream.write
930 w('\n')
931 w('\n')
932 w(name)
933 w('\n')
934 sql = 'SELECT * FROM %s' % name
935 tab = []
936 if name in self._tables:
937 headers = self._tables[name]
938 tab.append([None for _ in headers])
939 tab.append([hstr(x[0]) for x in headers])
940 tab.append([hstr(x[1]) for x in headers])
941 tab.append([None for _ in headers])
943 for row in self._conn.execute(sql):
944 tab.append([x for x in row])
946 widths = [
947 max((len(repr(x)) if x is not None else 0) for x in col)
948 for col in zip(*tab)]
950 for row in tab:
951 w(' '.join(
952 (repr(x).ljust(wid) if x is not None else ''.ljust(wid, '-'))
953 for (x, wid) in zip(row, widths)))
955 w('\n')
957 w('\n')
960class DatabaseStats(Object):
961 '''
962 Container to hold statistics about contents cached in meta-information db.
963 '''
965 nfiles = Int.T(
966 help='Number of files in database.')
967 nnuts = Int.T(
968 help='Number of index nuts in database.')
969 codes = List.T(
970 Tuple.T(content_t=String.T()),
971 help='Available code sequences in database, e.g. '
972 '(agency, network, station, location) for stations nuts.')
973 kinds = List.T(
974 String.T(),
975 help='Available content types in database.')
976 total_size = Int.T(
977 help='Aggregated file size [bytes] of files referenced in database.')
978 counts = Dict.T(
979 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()),
980 help='Breakdown of how many nuts of any content type and code '
981 'sequence are available in database, ``counts[kind][codes]``.')
982 persistent = List.T(
983 String.T(),
984 help='Names of persistent selections stored in database.')
986 def __str__(self):
987 kind_counts = dict(
988 (kind, sum(self.counts[kind].values())) for kind in self.kinds)
990 codes = [c.safe_str for c in self.codes]
992 if len(codes) > 20:
993 scodes = '\n' + util.ewrap(codes[:10], indent=' ') \
994 + '\n [%i more]\n' % (len(codes) - 20) \
995 + util.ewrap(codes[-10:], indent=' ')
996 else:
997 scodes = '\n' + util.ewrap(codes, indent=' ') \
998 if codes else '<none>'
1000 s = '''
1001Available codes: %s
1002Number of files: %i
1003Total size of known files: %s
1004Number of index nuts: %i
1005Available content kinds: %s
1006Persistent selections: %s''' % (
1007 scodes,
1008 self.nfiles,
1009 util.human_bytesize(self.total_size),
1010 self.nnuts,
1011 ', '.join('%s: %i' % (
1012 kind, kind_counts[kind]) for kind in sorted(self.kinds)),
1013 ', '.join(self.persistent))
1015 return s
1018__all__ = [
1019 'Database',
1020 'DatabaseStats',
1021]