1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6import sys
7import os
8import logging
9import sqlite3
10import re
11import time
12import types
13import weakref
15from pyrocko.io.io_common import FileLoadError
16from pyrocko import util
17from pyrocko.guts import Object, Int, List, Dict, Tuple, String
18from . import error, io
19from .model import Nut, to_kind_id, to_kind, to_codes_simple, \
20 codes_patterns_for_kind
21from .error import SquirrelError
23logger = logging.getLogger('psq.database')
25guts_prefix = 'squirrel'
28def abspath(path):
29 if not path.startswith('virtual:') and not path.startswith('client:'):
30 return os.path.abspath(path)
31 else:
32 return path
35def versiontuple(s):
36 fill = [0, 0, 0]
37 vals = [int(x) for x in s.split('.')]
38 fill[:len(vals)] = vals
39 return tuple(fill)
42class ExecuteGet1Error(SquirrelError):
43 pass
46def execute_get1(connection, sql, args=()):
47 rows = list(connection.execute(sql, args))
48 if len(rows) == 1:
49 return rows[0]
50 else:
51 raise ExecuteGet1Error('Expected database entry not found.')
54g_databases = {}
57def get_database(path):
58 path = os.path.abspath(path)
59 if path not in g_databases:
60 g_databases[path] = Database(path)
62 return g_databases[path]
65def close_database(database):
66 path = os.path.abspath(database._database_path)
67 database._conn.close()
68 if path in g_databases:
69 del g_databases[path]
72class Transaction(object):
73 def __init__(
74 self, conn,
75 label='',
76 mode='immediate',
77 retry_interval=0.1,
78 callback=None):
80 self.cursor = conn.cursor()
81 assert mode in ('deferred', 'immediate', 'exclusive')
82 self.mode = mode
83 self.depth = 0
84 self.rollback_wanted = False
85 self.retry_interval = retry_interval
86 self.callback = callback
87 self.label = label
88 self.started = False
90 def begin(self):
91 if self.depth == 0:
92 tries = 0
93 while True:
94 try:
95 tries += 1
96 self.cursor.execute('BEGIN %s' % self.mode.upper())
97 self.started = True
98 logger.debug(
99 'Transaction started: %-30s (pid: %s, mode: %s)'
100 % (self.label, os.getpid(), self.mode))
102 self.total_changes_begin \
103 = self.cursor.connection.total_changes
104 break
106 except sqlite3.OperationalError as e:
107 if not str(e) == 'database is locked':
108 raise
110 logger.info(
111 'Database is locked retrying in %s s: %s '
112 '(pid: %s, tries: %i)' % (
113 self.retry_interval, self.label,
114 os.getpid(), tries))
116 time.sleep(self.retry_interval)
118 self.depth += 1
120 def commit(self):
121 if not self.started:
122 raise Exception(
123 'Trying to commit without having started a transaction.')
125 self.depth -= 1
126 if self.depth == 0:
127 if not self.rollback_wanted:
128 self.cursor.execute('COMMIT')
129 self.started = False
130 if self.total_changes_begin is not None:
131 total_changes = self.cursor.connection.total_changes \
132 - self.total_changes_begin
133 else:
134 total_changes = None
136 if self.callback is not None and total_changes:
137 self.callback('modified', total_changes)
139 logger.debug(
140 'Transaction completed: %-30s '
141 '(pid: %s, changes: %i)' % (
142 self.label, os.getpid(), total_changes or 0))
144 else:
145 self.cursor.execute('ROLLBACK')
146 self.started = False
147 logger.warning('Deferred rollback executed.')
148 logger.debug(
149 'Transaction failed: %-30s (pid: %s)' % (
150 self.label, os.getpid()))
151 self.rollback_wanted = False
153 def rollback(self):
154 if not self.started:
155 raise Exception(
156 'Trying to rollback without having started a transaction.')
158 self.depth -= 1
159 if self.depth == 0:
160 self.cursor.execute('ROLLBACK')
161 self.started = False
163 logger.debug(
164 'Transaction failed: %-30s (pid: %s)' % (
165 self.label, os.getpid()))
167 self.rollback_wanted = False
168 else:
169 logger.warning('Deferred rollback scheduled.')
170 self.rollback_wanted = True
172 def close(self):
173 self.cursor.close()
175 def __enter__(self):
176 self.begin()
177 return self.cursor
179 def __exit__(self, exc_type, exc_value, traceback):
180 if exc_type is None:
181 self.commit()
182 else:
183 self.rollback()
185 if self.depth == 0:
186 self.close()
187 self.callback = None
190class Database(object):
191 '''
192 Shared meta-information database used by Squirrel.
193 '''
195 def __init__(self, database_path=':memory:', log_statements=False):
196 self._database_path = database_path
197 if database_path != ':memory:':
198 util.ensuredirs(database_path)
200 try:
201 logger.debug('Opening connection to database: %s' % database_path)
202 self._conn = sqlite3.connect(
203 database_path,
204 isolation_level=None,
205 check_same_thread=False if sqlite3.threadsafety else True)
206 except sqlite3.OperationalError:
207 raise error.SquirrelError(
208 'Cannot connect to database: %s' % database_path)
210 self._conn.text_factory = str
211 self._tables = {}
213 if log_statements:
214 self._conn.set_trace_callback(self._log_statement)
216 self._listeners = []
217 self._initialize_db()
218 self._basepath = None
220 self.version = None
222 def set_basepath(self, basepath):
223 if basepath is not None:
224 self._basepath = os.path.abspath(basepath)
225 else:
226 self._basepath = None
228 def relpath(self, path):
229 if self._basepath is not None and path.startswith(
230 self._basepath + os.path.sep):
231 return path[len(self._basepath) + 1:]
232 else:
233 return path
235 def abspath(self, path):
236 if self._basepath is not None and not path.startswith('virtual:') \
237 and not path.startswith('client:') \
238 and not os.path.isabs(path):
239 return os.path.join(self._basepath, path)
240 else:
241 return path
243 def _log_statement(self, statement):
244 logger.debug(statement)
246 def get_connection(self):
247 return self._conn
249 def transaction(self, label='', mode='immediate'):
250 return Transaction(
251 self._conn,
252 label=label,
253 mode=mode,
254 callback=self._notify_listeners)
256 def add_listener(self, listener):
257 if isinstance(listener, types.MethodType):
258 listener_ref = weakref.WeakMethod(listener)
259 else:
260 listener_ref = weakref.ref(listener)
262 self._listeners.append(listener_ref)
263 return listener_ref
265 def remove_listener(self, listener_ref):
266 self._listeners.remove(listener_ref)
268 def _notify_listeners(self, event, *args):
269 dead = []
270 for listener_ref in self._listeners:
271 listener = listener_ref()
272 if listener is not None:
273 listener(event, *args)
274 else:
275 dead.append(listener_ref)
277 for listener_ref in dead:
278 self.remove_listener(listener_ref)
280 def _register_table(self, s):
281 m = re.search(r'(\S+)\s*\(([^)]+)\)', s)
282 table_name = m.group(1)
283 dtypes = m.group(2)
284 table_header = []
285 for dele in dtypes.split(','):
286 table_header.append(dele.split()[:2])
288 self._tables[table_name] = table_header
290 return s
292 def _initialize_db(self):
293 with self.transaction('initialize') as cursor:
294 cursor.execute(
295 '''PRAGMA recursive_triggers = true''')
297 cursor.execute(
298 '''PRAGMA busy_timeout = 30000''')
300 if 2 == len(list(
301 cursor.execute(
302 '''
303 SELECT name FROM sqlite_master
304 WHERE type = 'table' AND name IN (
305 'files',
306 'persistent')
307 '''))):
309 try:
310 self.version = versiontuple(execute_get1(
311 cursor,
312 '''
313 SELECT value FROM settings
314 WHERE key == "version"
315 ''')[0])
316 except sqlite3.OperationalError:
317 raise error.SquirrelError(
318 'Squirrel database in pre-release format found: %s\n'
319 'Please remove the database file and reindex.'
320 % self._database_path)
322 if self.version >= (1, 1, 0):
323 raise error.SquirrelError(
324 'Squirrel database "%s" is of version %i.%i.%i which '
325 'is not supported by this version of Pyrocko. Please '
326 'upgrade the Pyrocko library.'
327 % ((self._database_path, ) + self.version))
329 return
331 cursor.execute(self._register_table(
332 '''
333 CREATE TABLE IF NOT EXISTS settings (
334 key text PRIMARY KEY,
335 value text)
336 '''))
338 cursor.execute(
339 'INSERT OR IGNORE INTO settings VALUES ("version", "1.0")')
341 self.version = execute_get1(
342 cursor,
343 'SELECT value FROM settings WHERE key == "version"')
345 cursor.execute(self._register_table(
346 '''
347 CREATE TABLE IF NOT EXISTS files (
348 file_id integer PRIMARY KEY,
349 path text,
350 format text,
351 mtime float,
352 size integer)
353 '''))
355 cursor.execute(
356 '''
357 CREATE UNIQUE INDEX IF NOT EXISTS index_files_path
358 ON files (path)
359 ''')
361 cursor.execute(self._register_table(
362 '''
363 CREATE TABLE IF NOT EXISTS nuts (
364 nut_id integer PRIMARY KEY AUTOINCREMENT,
365 file_id integer,
366 file_segment integer,
367 file_element integer,
368 kind_id integer,
369 kind_codes_id integer,
370 tmin_seconds integer,
371 tmin_offset integer,
372 tmax_seconds integer,
373 tmax_offset integer,
374 kscale integer)
375 '''))
377 cursor.execute(
378 '''
379 CREATE UNIQUE INDEX IF NOT EXISTS index_nuts_file_element
380 ON nuts (file_id, file_segment, file_element)
381 ''')
383 cursor.execute(self._register_table(
384 '''
385 CREATE TABLE IF NOT EXISTS kind_codes (
386 kind_codes_id integer PRIMARY KEY,
387 kind_id integer,
388 codes text,
389 deltat float)
390 '''))
392 cursor.execute(
393 '''
394 CREATE UNIQUE INDEX IF NOT EXISTS index_kind_codes
395 ON kind_codes (kind_id, codes, deltat)
396 ''')
398 cursor.execute(self._register_table(
399 '''
400 CREATE TABLE IF NOT EXISTS kind_codes_count (
401 kind_codes_id integer PRIMARY KEY,
402 count integer)
403 '''))
405 cursor.execute(
406 '''
407 CREATE INDEX IF NOT EXISTS index_nuts_file_id
408 ON nuts (file_id)
409 ''')
411 cursor.execute(
412 '''
413 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_delete_file
414 BEFORE DELETE ON files FOR EACH ROW
415 BEGIN
416 DELETE FROM nuts where file_id == old.file_id;
417 END
418 ''')
420 # trigger only on size to make silent update of mtime possible
421 cursor.execute(
422 '''
423 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_update_file
424 BEFORE UPDATE OF size ON files FOR EACH ROW
425 BEGIN
426 DELETE FROM nuts where file_id == old.file_id;
427 END
428 ''')
430 cursor.execute(
431 '''
432 CREATE TRIGGER IF NOT EXISTS increment_kind_codes
433 BEFORE INSERT ON nuts FOR EACH ROW
434 BEGIN
435 INSERT OR IGNORE INTO kind_codes_count
436 VALUES (new.kind_codes_id, 0);
437 UPDATE kind_codes_count
438 SET count = count + 1
439 WHERE new.kind_codes_id == kind_codes_id;
440 END
441 ''')
443 cursor.execute(
444 '''
445 CREATE TRIGGER IF NOT EXISTS decrement_kind_codes
446 BEFORE DELETE ON nuts FOR EACH ROW
447 BEGIN
448 UPDATE kind_codes_count
449 SET count = count - 1
450 WHERE old.kind_codes_id == kind_codes_id;
451 END
452 ''')
454 cursor.execute(self._register_table(
455 '''
456 CREATE TABLE IF NOT EXISTS persistent (
457 name text UNIQUE)
458 '''))
460 def dig(self, nuts, transaction=None):
461 '''
462 Store or update content meta-information.
464 Given ``nuts`` are assumed to represent an up-to-date and complete
465 inventory of a set of files. Any old information about these files is
466 first pruned from the database (via database triggers). If such content
467 is part of a live selection, it is also removed there. Then the new
468 content meta-information is inserted into the main database. The
469 content is not automatically inserted into the live selections again.
470 It is in the responsibility of the selection object to perform this
471 step.
472 '''
474 nuts = list(nuts)
476 if not nuts:
477 return
479 files = set()
480 kind_codes = set()
481 for nut in nuts:
482 files.add((
483 self.relpath(nut.file_path),
484 nut.file_format,
485 nut.file_mtime,
486 nut.file_size))
487 kind_codes.add(
488 (nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0))
490 with (transaction or self.transaction('dig')) as c:
492 c.executemany(
493 'INSERT OR IGNORE INTO files VALUES (NULL,?,?,?,?)', files)
495 c.executemany(
496 '''UPDATE files SET
497 format = ?, mtime = ?, size = ?
498 WHERE path == ?
499 ''',
500 ((x[1], x[2], x[3], x[0]) for x in files))
502 c.executemany(
503 'INSERT OR IGNORE INTO kind_codes VALUES (NULL,?,?,?)',
504 kind_codes)
506 c.executemany(
507 '''
508 INSERT INTO nuts VALUES
509 (NULL, (
510 SELECT file_id FROM files
511 WHERE path == ?
512 ),?,?,?,
513 (
514 SELECT kind_codes_id FROM kind_codes
515 WHERE kind_id == ? AND codes == ? AND deltat == ?
516 ), ?,?,?,?,?)
517 ''',
518 ((self.relpath(nut.file_path),
519 nut.file_segment, nut.file_element,
520 nut.kind_id,
521 nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0,
522 nut.tmin_seconds, nut.tmin_offset,
523 nut.tmax_seconds, nut.tmax_offset,
524 nut.kscale) for nut in nuts))
526 def undig(self, path):
528 path = self.relpath(abspath(path))
530 sql = '''
531 SELECT
532 files.path,
533 files.format,
534 files.mtime,
535 files.size,
536 nuts.file_segment,
537 nuts.file_element,
538 kind_codes.kind_id,
539 kind_codes.codes,
540 nuts.tmin_seconds,
541 nuts.tmin_offset,
542 nuts.tmax_seconds,
543 nuts.tmax_offset,
544 kind_codes.deltat
545 FROM files
546 INNER JOIN nuts ON files.file_id = nuts.file_id
547 INNER JOIN kind_codes
548 ON nuts.kind_codes_id == kind_codes.kind_codes_id
549 WHERE path == ?
550 '''
552 return [Nut(values_nocheck=(self.abspath(row[0]),) + row[1:])
553 for row in self._conn.execute(sql, (path,))]
555 def undig_all(self):
556 sql = '''
557 SELECT
558 files.path,
559 files.format,
560 files.mtime,
561 files.size,
562 nuts.file_segment,
563 nuts.file_element,
564 kind_codes.kind_id,
565 kind_codes.codes,
566 nuts.tmin_seconds,
567 nuts.tmin_offset,
568 nuts.tmax_seconds,
569 nuts.tmax_offset,
570 kind_codes.deltat
571 FROM files
572 INNER JOIN nuts ON files.file_id == nuts.file_id
573 INNER JOIN kind_codes
574 ON nuts.kind_codes_id == kind_codes.kind_codes_id
575 '''
577 nuts = []
578 path = None
579 for values in self._conn.execute(sql):
580 if path is not None and values[0] != path:
581 yield path, nuts
582 nuts = []
584 path = self.abspath(values[0])
586 if values[1] is not None:
587 nuts.append(Nut(values_nocheck=(path,) + values[1:]))
589 if path is not None:
590 yield path, nuts
592 def undig_few(self, paths, format='detect'):
593 for path in paths:
594 nuts = self.undig(path)
595 if nuts:
596 yield (nuts[0].file_format, path), nuts
597 else:
598 yield (format, path), []
600 def undig_many(self, paths, show_progress=True):
601 selection = self.new_selection(paths, show_progress=show_progress)
603 for (_, path), nuts in selection.undig_grouped():
604 yield path, nuts
606 del selection
608 def new_selection(self, paths=None, format='detect', show_progress=True):
609 from .selection import Selection
610 selection = Selection(self)
611 if paths:
612 selection.add(paths, format=format, show_progress=show_progress)
613 return selection
615 def undig_content(self, nut):
616 return None
618 def remove(self, path):
619 '''
620 Prune content meta-information about a given file.
622 All content pieces belonging to file ``path`` are removed from the
623 main database and any attached live selections (via database triggers).
624 '''
626 path = self.relpath(abspath(path))
628 with self.transaction('remove file') as cursor:
629 cursor.execute(
630 'DELETE FROM files WHERE path = ?', (path,))
632 def remove_glob(self, pattern):
633 '''
634 Prune content meta-information about files matching given pattern.
636 All content pieces belonging to files who's pathes match the given
637 ``pattern`` are removed from the main database and any attached live
638 selections (via database triggers).
639 '''
641 with self.transaction('remove file glob') as cursor:
642 return cursor.execute(
643 'DELETE FROM files WHERE path GLOB ?', (pattern,)).rowcount
645 def _remove_volatile(self):
646 '''
647 Prune leftover volatile content from database.
649 If the cleanup handler of an attached selection is not called, e.g. due
650 to a crash or terminated process, volatile content will not be removed
651 properly. This method will delete such leftover entries.
653 This is a mainenance operatation which should only be called when no
654 apps are using the database because it would remove volatile content
655 currently used by the apps.
656 '''
658 with self.transaction('remove volatile') as cursor:
659 return cursor.execute(
660 '''
661 DELETE FROM files
662 WHERE path LIKE 'virtual:volatile:%'
663 ''').rowcount
665 def reset(self, path, transaction=None):
666 '''
667 Prune information associated with a given file, but keep the file path.
669 This method is called when reading a file failed. File attributes,
670 format, size and modification time are set to NULL. File content
671 meta-information is removed from the database and any attached live
672 selections (via database triggers).
673 '''
675 path = self.relpath(abspath(path))
677 with (transaction or self.transaction('reset file')) as cursor:
678 cursor.execute(
679 '''
680 UPDATE files SET
681 format = NULL,
682 mtime = NULL,
683 size = NULL
684 WHERE path = ?
685 ''', (path,))
687 def silent_touch(self, path):
688 '''
689 Update modification time of file without initiating reindexing.
691 Useful to prolong validity period of data with expiration date.
692 '''
694 apath = abspath(path)
695 path = self.relpath(apath)
697 with self.transaction('silent touch') as cursor:
699 sql = 'SELECT format, size FROM files WHERE path = ?'
700 fmt, size = execute_get1(cursor, sql, (path,))
702 mod = io.get_backend(fmt)
703 mod.touch(apath)
704 file_stats = mod.get_stats(apath)
706 if file_stats[1] != size:
707 raise FileLoadError(
708 'Silent update for file "%s" failed: size has changed.'
709 % apath)
711 sql = '''
712 UPDATE files
713 SET mtime = ?
714 WHERE path = ?
715 '''
716 cursor.execute(sql, (file_stats[0], path))
718 def _iter_codes_info(
719 self, kind=None, codes=None, kind_codes_count='kind_codes_count'):
721 args = []
722 sel = ''
723 if kind is not None:
724 kind_id = to_kind_id(kind)
726 sel = 'AND kind_codes.kind_id == ?'
727 args.append(to_kind_id(kind))
729 if codes is not None:
730 assert kind is not None # TODO supp by recursing possible kinds
731 kind_id = to_kind_id(kind)
732 pats = codes_patterns_for_kind(kind_id, codes)
734 if pats:
735 # could optimize this by using IN for non-patterns
736 sel += ' AND ( %s ) ' % ' OR '.join(
737 ('kind_codes.codes GLOB ?',) * len(pats))
739 args.extend(pat.safe_str for pat in pats)
741 sql = ('''
742 SELECT
743 kind_codes.kind_id,
744 kind_codes.codes,
745 kind_codes.deltat,
746 kind_codes.kind_codes_id,
747 %(kind_codes_count)s.count
748 FROM %(kind_codes_count)s
749 INNER JOIN kind_codes
750 ON %(kind_codes_count)s.kind_codes_id
751 == kind_codes.kind_codes_id
752 WHERE %(kind_codes_count)s.count > 0
753 ''' + sel + '''
754 ''') % {'kind_codes_count': kind_codes_count}
756 for kind_id, scodes, deltat, kcid, count in self._conn.execute(
757 sql, args):
759 yield (
760 kind_id, to_codes_simple(kind_id, scodes), deltat, kcid, count)
762 def _iter_deltats(self, kind=None, kind_codes_count='kind_codes_count'):
763 args = []
764 sel = ''
765 if kind is not None:
766 assert isinstance(kind, str)
767 sel = 'AND kind_codes.kind_id == ?'
768 args.append(to_kind_id(kind))
770 sql = ('''
771 SELECT DISTINCT kind_codes.deltat FROM %(kind_codes_count)s
772 INNER JOIN kind_codes
773 ON %(kind_codes_count)s.kind_codes_id
774 == kind_codes.kind_codes_id
775 WHERE %(kind_codes_count)s.count > 0
776 ''' + sel + '''
777 ORDER BY kind_codes.deltat
778 ''') % {'kind_codes_count': kind_codes_count}
780 for row in self._conn.execute(sql, args):
781 yield row[0]
783 def _iter_codes(self, kind=None, kind_codes_count='kind_codes_count'):
784 args = []
785 sel = ''
786 if kind is not None:
787 assert isinstance(kind, str)
788 sel = 'AND kind_codes.kind_id == ?'
789 args.append(to_kind_id(kind))
791 sql = ('''
792 SELECT DISTINCT kind_codes.kind_id, kind_codes.codes
793 FROM %(kind_codes_count)s
794 INNER JOIN kind_codes
795 ON %(kind_codes_count)s.kind_codes_id
796 == kind_codes.kind_codes_id
797 WHERE %(kind_codes_count)s.count > 0
798 ''' + sel + '''
799 ORDER BY kind_codes.codes
801 ''') % dict(kind_codes_count=kind_codes_count)
803 for row in self._conn.execute(sql, args):
804 yield to_codes_simple(*row)
806 def _iter_kinds(self, codes=None, kind_codes_count='kind_codes_count'):
807 args = []
808 sel = ''
809 if codes is not None:
810 sel = 'AND kind_codes.codes == ?'
811 args.append(codes.safe_str)
813 sql = ('''
814 SELECT DISTINCT kind_codes.kind_id FROM %(kind_codes_count)s
815 INNER JOIN kind_codes
816 ON %(kind_codes_count)s.kind_codes_id
817 == kind_codes.kind_codes_id
818 WHERE %(kind_codes_count)s.count > 0
819 ''' + sel + '''
820 ORDER BY kind_codes.kind_id
821 ''') % {'kind_codes_count': kind_codes_count}
823 for row in self._conn.execute(sql, args):
824 yield to_kind(row[0])
826 def iter_paths(self):
827 for row in self._conn.execute('''SELECT path FROM files'''):
828 yield self.abspath(row[0])
830 def iter_nnuts_by_file(self):
831 sql = '''
832 SELECT
833 path,
834 (SELECT COUNT(*) FROM nuts WHERE nuts.file_id = files.file_id)
835 FROM files
836 '''
837 for row in self._conn.execute(sql):
838 yield (self.abspath(row[0]),) + row[1:]
840 def iter_kinds(self, codes=None):
841 return self._iter_kinds(codes=codes)
843 def iter_codes(self, kind=None):
844 return self._iter_codes(kind=kind)
846 def get_paths(self):
847 return list(self.iter_paths())
849 def get_kinds(self, codes=None):
850 return list(self.iter_kinds(codes=codes))
852 def get_codes(self, kind=None):
853 return list(self.iter_codes(kind=kind))
855 def get_counts(self, kind=None):
856 d = {}
857 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind):
858 if kind_id not in d:
859 v = d[kind_id] = {}
860 else:
861 v = d[kind_id]
863 if codes not in v:
864 v[codes] = 0
866 v[codes] += count
868 if kind is not None:
869 return d[to_kind_id(kind)]
870 else:
871 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items())
873 def get_nfiles(self):
874 sql = '''SELECT COUNT(*) FROM files'''
875 for row in self._conn.execute(sql):
876 return row[0]
878 def get_nnuts(self):
879 sql = '''SELECT COUNT(*) FROM nuts'''
880 for row in self._conn.execute(sql):
881 return row[0]
883 def get_nnuts_by_file(self):
884 return list(self.iter_nnuts_by_file())
886 def get_total_size(self):
887 sql = '''
888 SELECT SUM(files.size) FROM files
889 '''
891 for row in self._conn.execute(sql):
892 return row[0] or 0
894 def get_persistent_names(self):
895 sql = '''
896 SELECT name FROM persistent
897 '''
898 return [row[0] for row in self._conn.execute(sql)]
900 def get_stats(self):
901 return DatabaseStats(
902 nfiles=self.get_nfiles(),
903 nnuts=self.get_nnuts(),
904 kinds=self.get_kinds(),
905 codes=self.get_codes(),
906 counts=self.get_counts(),
907 total_size=self.get_total_size(),
908 persistent=self.get_persistent_names())
910 def __str__(self):
911 return str(self.get_stats())
913 def print_tables(self, stream=None):
914 for table in [
915 'persistent',
916 'files',
917 'nuts',
918 'kind_codes',
919 'kind_codes_count']:
921 self.print_table(table, stream=stream)
923 def print_table(self, name, stream=None):
925 if stream is None:
926 stream = sys.stdout
928 class hstr(str):
929 def __repr__(self):
930 return self
932 w = stream.write
933 w('\n')
934 w('\n')
935 w(name)
936 w('\n')
937 sql = 'SELECT * FROM %s' % name
938 tab = []
939 if name in self._tables:
940 headers = self._tables[name]
941 tab.append([None for _ in headers])
942 tab.append([hstr(x[0]) for x in headers])
943 tab.append([hstr(x[1]) for x in headers])
944 tab.append([None for _ in headers])
946 for row in self._conn.execute(sql):
947 tab.append([x for x in row])
949 widths = [
950 max((len(repr(x)) if x is not None else 0) for x in col)
951 for col in zip(*tab)]
953 for row in tab:
954 w(' '.join(
955 (repr(x).ljust(wid) if x is not None else ''.ljust(wid, '-'))
956 for (x, wid) in zip(row, widths)))
958 w('\n')
960 w('\n')
963class DatabaseStats(Object):
964 '''
965 Container to hold statistics about contents cached in meta-information db.
966 '''
968 nfiles = Int.T(
969 help='Number of files in database.')
970 nnuts = Int.T(
971 help='Number of index nuts in database.')
972 codes = List.T(
973 Tuple.T(content_t=String.T()),
974 help='Available code sequences in database, e.g. '
975 '(agency, network, station, location) for stations nuts.')
976 kinds = List.T(
977 String.T(),
978 help='Available content types in database.')
979 total_size = Int.T(
980 help='Aggregated file size [bytes] of files referenced in database.')
981 counts = Dict.T(
982 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()),
983 help='Breakdown of how many nuts of any content type and code '
984 'sequence are available in database, ``counts[kind][codes]``.')
985 persistent = List.T(
986 String.T(),
987 help='Names of persistent selections stored in database.')
989 def __str__(self):
990 kind_counts = dict(
991 (kind, sum(self.counts[kind].values())) for kind in self.kinds)
993 codes = [c.safe_str for c in self.codes]
995 if len(codes) > 20:
996 scodes = '\n' + util.ewrap(codes[:10], indent=' ') \
997 + '\n [%i more]\n' % (len(codes) - 20) \
998 + util.ewrap(codes[-10:], indent=' ')
999 else:
1000 scodes = '\n' + util.ewrap(codes, indent=' ') \
1001 if codes else '<none>'
1003 s = '''
1004Available codes: %s
1005Number of files: %i
1006Total size of known files: %s
1007Number of index nuts: %i
1008Available content kinds: %s
1009Persistent selections: %s''' % (
1010 scodes,
1011 self.nfiles,
1012 util.human_bytesize(self.total_size),
1013 self.nnuts,
1014 ', '.join('%s: %i' % (
1015 kind, kind_counts[kind]) for kind in sorted(self.kinds)),
1016 ', '.join(self.persistent))
1018 return s
1021__all__ = [
1022 'Database',
1023 'DatabaseStats',
1024]