1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6import sys
7import os
8import logging
9import sqlite3
10import re
11import time
13from pyrocko.io.io_common import FileLoadError
14from pyrocko import util
15from pyrocko.guts import Object, Int, List, Dict, Tuple, String
16from . import error, io
17from .model import Nut, to_kind_id, to_kind, to_codes_simple, \
18 codes_patterns_for_kind
19from .error import SquirrelError
21logger = logging.getLogger('psq.database')
23guts_prefix = 'squirrel'
26def abspath(path):
27 if not path.startswith('virtual:') and not path.startswith('client:'):
28 return os.path.abspath(path)
29 else:
30 return path
33def versiontuple(s):
34 fill = [0, 0, 0]
35 vals = [int(x) for x in s.split('.')]
36 fill[:len(vals)] = vals
37 return tuple(fill)
40class ExecuteGet1Error(SquirrelError):
41 pass
44def execute_get1(connection, sql, args=()):
45 rows = list(connection.execute(sql, args))
46 if len(rows) == 1:
47 return rows[0]
48 else:
49 raise ExecuteGet1Error('Expected database entry not found.')
52g_databases = {}
55def get_database(path):
56 path = os.path.abspath(path)
57 if path not in g_databases:
58 g_databases[path] = Database(path)
60 return g_databases[path]
63def close_database(database):
64 path = os.path.abspath(database._database_path)
65 database._conn.close()
66 if path in g_databases:
67 del g_databases[path]
70class Transaction(object):
71 def __init__(
72 self, conn,
73 label='',
74 mode='immediate',
75 retry_interval=0.1,
76 callback=None):
78 self.cursor = conn.cursor()
79 assert mode in ('deferred', 'immediate', 'exclusive')
80 self.mode = mode
81 self.depth = 0
82 self.rollback_wanted = False
83 self.retry_interval = retry_interval
84 self.callback = callback
85 self.label = label
86 self.started = False
88 def begin(self):
89 if self.depth == 0:
90 tries = 0
91 while True:
92 try:
93 tries += 1
94 self.cursor.execute('BEGIN %s' % self.mode.upper())
95 self.started = True
96 logger.debug(
97 'Transaction started: %-30s (pid: %s, mode: %s)'
98 % (self.label, os.getpid(), self.mode))
100 self.total_changes_begin \
101 = self.cursor.connection.total_changes
102 break
104 except sqlite3.OperationalError as e:
105 if not str(e) == 'database is locked':
106 raise
108 logger.info(
109 'Database is locked retrying in %s s: %s '
110 '(pid: %s, tries: %i)' % (
111 self.retry_interval, self.label,
112 os.getpid(), tries))
114 time.sleep(self.retry_interval)
116 self.depth += 1
118 def commit(self):
119 if not self.started:
120 raise Exception(
121 'Trying to commit without having started a transaction.')
123 self.depth -= 1
124 if self.depth == 0:
125 if not self.rollback_wanted:
126 self.cursor.execute('COMMIT')
127 self.started = False
128 if self.total_changes_begin is not None:
129 total_changes = self.cursor.connection.total_changes \
130 - self.total_changes_begin
131 else:
132 total_changes = None
134 if self.callback is not None and total_changes:
135 self.callback('modified', total_changes)
137 logger.debug(
138 'Transaction completed: %-30s '
139 '(pid: %s, changes: %i)' % (
140 self.label, os.getpid(), total_changes or 0))
142 else:
143 self.cursor.execute('ROLLBACK')
144 self.started = False
145 logger.warning('Deferred rollback executed.')
146 logger.debug(
147 'Transaction failed: %-30s (pid: %s)' % (
148 self.label, os.getpid()))
149 self.rollback_wanted = False
151 def rollback(self):
152 if not self.started:
153 raise Exception(
154 'Trying to rollback without having started a transaction.')
156 self.depth -= 1
157 if self.depth == 0:
158 self.cursor.execute('ROLLBACK')
159 self.started = False
161 logger.debug(
162 'Transaction failed: %-30s (pid: %s)' % (
163 self.label, os.getpid()))
165 self.rollback_wanted = False
166 else:
167 logger.warning('Deferred rollback scheduled.')
168 self.rollback_wanted = True
170 def close(self):
171 self.cursor.close()
173 def __enter__(self):
174 self.begin()
175 return self.cursor
177 def __exit__(self, exc_type, exc_value, traceback):
178 if exc_type is None:
179 self.commit()
180 else:
181 self.rollback()
183 if self.depth == 0:
184 self.close()
185 self.callback = None
188class Database(object):
189 '''
190 Shared meta-information database used by Squirrel.
191 '''
193 def __init__(self, database_path=':memory:', log_statements=False):
194 self._database_path = database_path
195 if database_path != ':memory:':
196 util.ensuredirs(database_path)
198 try:
199 logger.debug('Opening connection to database: %s' % database_path)
200 self._conn = sqlite3.connect(database_path, isolation_level=None)
201 except sqlite3.OperationalError:
202 raise error.SquirrelError(
203 'Cannot connect to database: %s' % database_path)
205 self._conn.text_factory = str
206 self._tables = {}
208 if log_statements:
209 self._conn.set_trace_callback(self._log_statement)
211 self._listeners = []
212 self._initialize_db()
213 self._basepath = None
215 self.version = None
217 def set_basepath(self, basepath):
218 if basepath is not None:
219 self._basepath = os.path.abspath(basepath)
220 else:
221 self._basepath = None
223 def relpath(self, path):
224 if self._basepath is not None and path.startswith(
225 self._basepath + os.path.sep):
226 return path[len(self._basepath) + 1:]
227 else:
228 return path
230 def abspath(self, path):
231 if self._basepath is not None and not path.startswith('virtual:') \
232 and not path.startswith('client:') \
233 and not os.path.isabs(path):
234 return os.path.join(self._basepath, path)
235 else:
236 return path
238 def _log_statement(self, statement):
239 logger.debug(statement)
241 def get_connection(self):
242 return self._conn
244 def transaction(self, label='', mode='immediate'):
245 return Transaction(
246 self._conn,
247 label=label,
248 mode=mode,
249 callback=self._notify_listeners)
251 def add_listener(self, listener):
252 listener_ref = util.smart_weakref(listener)
253 self._listeners.append(listener_ref)
254 return listener_ref
256 def remove_listener(self, listener_ref):
257 self._listeners.remove(listener_ref)
259 def _notify_listeners(self, event, *args):
260 dead = []
261 for listener_ref in self._listeners:
262 listener = listener_ref()
263 if listener is not None:
264 listener(event, *args)
265 else:
266 dead.append(listener_ref)
268 for listener_ref in dead:
269 self.remove_listener(listener_ref)
271 def _register_table(self, s):
272 m = re.search(r'(\S+)\s*\(([^)]+)\)', s)
273 table_name = m.group(1)
274 dtypes = m.group(2)
275 table_header = []
276 for dele in dtypes.split(','):
277 table_header.append(dele.split()[:2])
279 self._tables[table_name] = table_header
281 return s
283 def _initialize_db(self):
284 with self.transaction('initialize') as cursor:
285 cursor.execute(
286 '''PRAGMA recursive_triggers = true''')
288 cursor.execute(
289 '''PRAGMA busy_timeout = 30000''')
291 if 2 == len(list(
292 cursor.execute(
293 '''
294 SELECT name FROM sqlite_master
295 WHERE type = 'table' AND name IN (
296 'files',
297 'persistent')
298 '''))):
300 try:
301 self.version = versiontuple(execute_get1(
302 cursor,
303 '''
304 SELECT value FROM settings
305 WHERE key == "version"
306 ''')[0])
307 except sqlite3.OperationalError:
308 raise error.SquirrelError(
309 'Squirrel database in pre-release format found: %s\n'
310 'Please remove the database file and reindex.'
311 % self._database_path)
313 if self.version >= (1, 1, 0):
314 raise error.SquirrelError(
315 'Squirrel database "%s" is of version %i.%i.%i which '
316 'is not supported by this version of Pyrocko. Please '
317 'upgrade the Pyrocko library.'
318 % ((self._database_path, ) + self.version))
320 return
322 cursor.execute(self._register_table(
323 '''
324 CREATE TABLE IF NOT EXISTS settings (
325 key text PRIMARY KEY,
326 value text)
327 '''))
329 cursor.execute(
330 'INSERT OR IGNORE INTO settings VALUES ("version", "1.0")')
332 self.version = execute_get1(
333 cursor,
334 'SELECT value FROM settings WHERE key == "version"')
336 cursor.execute(self._register_table(
337 '''
338 CREATE TABLE IF NOT EXISTS files (
339 file_id integer PRIMARY KEY,
340 path text,
341 format text,
342 mtime float,
343 size integer)
344 '''))
346 cursor.execute(
347 '''
348 CREATE UNIQUE INDEX IF NOT EXISTS index_files_path
349 ON files (path)
350 ''')
352 cursor.execute(self._register_table(
353 '''
354 CREATE TABLE IF NOT EXISTS nuts (
355 nut_id integer PRIMARY KEY AUTOINCREMENT,
356 file_id integer,
357 file_segment integer,
358 file_element integer,
359 kind_id integer,
360 kind_codes_id integer,
361 tmin_seconds integer,
362 tmin_offset integer,
363 tmax_seconds integer,
364 tmax_offset integer,
365 kscale integer)
366 '''))
368 cursor.execute(
369 '''
370 CREATE UNIQUE INDEX IF NOT EXISTS index_nuts_file_element
371 ON nuts (file_id, file_segment, file_element)
372 ''')
374 cursor.execute(self._register_table(
375 '''
376 CREATE TABLE IF NOT EXISTS kind_codes (
377 kind_codes_id integer PRIMARY KEY,
378 kind_id integer,
379 codes text,
380 deltat float)
381 '''))
383 cursor.execute(
384 '''
385 CREATE UNIQUE INDEX IF NOT EXISTS index_kind_codes
386 ON kind_codes (kind_id, codes, deltat)
387 ''')
389 cursor.execute(self._register_table(
390 '''
391 CREATE TABLE IF NOT EXISTS kind_codes_count (
392 kind_codes_id integer PRIMARY KEY,
393 count integer)
394 '''))
396 cursor.execute(
397 '''
398 CREATE INDEX IF NOT EXISTS index_nuts_file_id
399 ON nuts (file_id)
400 ''')
402 cursor.execute(
403 '''
404 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_delete_file
405 BEFORE DELETE ON files FOR EACH ROW
406 BEGIN
407 DELETE FROM nuts where file_id == old.file_id;
408 END
409 ''')
411 # trigger only on size to make silent update of mtime possible
412 cursor.execute(
413 '''
414 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_update_file
415 BEFORE UPDATE OF size ON files FOR EACH ROW
416 BEGIN
417 DELETE FROM nuts where file_id == old.file_id;
418 END
419 ''')
421 cursor.execute(
422 '''
423 CREATE TRIGGER IF NOT EXISTS increment_kind_codes
424 BEFORE INSERT ON nuts FOR EACH ROW
425 BEGIN
426 INSERT OR IGNORE INTO kind_codes_count
427 VALUES (new.kind_codes_id, 0);
428 UPDATE kind_codes_count
429 SET count = count + 1
430 WHERE new.kind_codes_id == kind_codes_id;
431 END
432 ''')
434 cursor.execute(
435 '''
436 CREATE TRIGGER IF NOT EXISTS decrement_kind_codes
437 BEFORE DELETE ON nuts FOR EACH ROW
438 BEGIN
439 UPDATE kind_codes_count
440 SET count = count - 1
441 WHERE old.kind_codes_id == kind_codes_id;
442 END
443 ''')
445 cursor.execute(self._register_table(
446 '''
447 CREATE TABLE IF NOT EXISTS persistent (
448 name text UNIQUE)
449 '''))
451 def dig(self, nuts, transaction=None):
452 '''
453 Store or update content meta-information.
455 Given ``nuts`` are assumed to represent an up-to-date and complete
456 inventory of a set of files. Any old information about these files is
457 first pruned from the database (via database triggers). If such content
458 is part of a live selection, it is also removed there. Then the new
459 content meta-information is inserted into the main database. The
460 content is not automatically inserted into the live selections again.
461 It is in the responsibility of the selection object to perform this
462 step.
463 '''
465 nuts = list(nuts)
467 if not nuts:
468 return
470 files = set()
471 kind_codes = set()
472 for nut in nuts:
473 files.add((
474 self.relpath(nut.file_path),
475 nut.file_format,
476 nut.file_mtime,
477 nut.file_size))
478 kind_codes.add(
479 (nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0))
481 with (transaction or self.transaction('dig')) as c:
483 c.executemany(
484 'INSERT OR IGNORE INTO files VALUES (NULL,?,?,?,?)', files)
486 c.executemany(
487 '''UPDATE files SET
488 format = ?, mtime = ?, size = ?
489 WHERE path == ?
490 ''',
491 ((x[1], x[2], x[3], x[0]) for x in files))
493 c.executemany(
494 'INSERT OR IGNORE INTO kind_codes VALUES (NULL,?,?,?)',
495 kind_codes)
497 c.executemany(
498 '''
499 INSERT INTO nuts VALUES
500 (NULL, (
501 SELECT file_id FROM files
502 WHERE path == ?
503 ),?,?,?,
504 (
505 SELECT kind_codes_id FROM kind_codes
506 WHERE kind_id == ? AND codes == ? AND deltat == ?
507 ), ?,?,?,?,?)
508 ''',
509 ((self.relpath(nut.file_path),
510 nut.file_segment, nut.file_element,
511 nut.kind_id,
512 nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0,
513 nut.tmin_seconds, nut.tmin_offset,
514 nut.tmax_seconds, nut.tmax_offset,
515 nut.kscale) for nut in nuts))
517 def undig(self, path):
519 path = self.relpath(abspath(path))
521 sql = '''
522 SELECT
523 files.path,
524 files.format,
525 files.mtime,
526 files.size,
527 nuts.file_segment,
528 nuts.file_element,
529 kind_codes.kind_id,
530 kind_codes.codes,
531 nuts.tmin_seconds,
532 nuts.tmin_offset,
533 nuts.tmax_seconds,
534 nuts.tmax_offset,
535 kind_codes.deltat
536 FROM files
537 INNER JOIN nuts ON files.file_id = nuts.file_id
538 INNER JOIN kind_codes
539 ON nuts.kind_codes_id == kind_codes.kind_codes_id
540 WHERE path == ?
541 '''
543 return [Nut(values_nocheck=(self.abspath(row[0]),) + row[1:])
544 for row in self._conn.execute(sql, (path,))]
546 def undig_all(self):
547 sql = '''
548 SELECT
549 files.path,
550 files.format,
551 files.mtime,
552 files.size,
553 nuts.file_segment,
554 nuts.file_element,
555 kind_codes.kind_id,
556 kind_codes.codes,
557 nuts.tmin_seconds,
558 nuts.tmin_offset,
559 nuts.tmax_seconds,
560 nuts.tmax_offset,
561 kind_codes.deltat
562 FROM files
563 INNER JOIN nuts ON files.file_id == nuts.file_id
564 INNER JOIN kind_codes
565 ON nuts.kind_codes_id == kind_codes.kind_codes_id
566 '''
568 nuts = []
569 path = None
570 for values in self._conn.execute(sql):
571 if path is not None and values[0] != path:
572 yield path, nuts
573 nuts = []
575 path = self.abspath(values[0])
577 if values[1] is not None:
578 nuts.append(Nut(values_nocheck=(path,) + values[1:]))
580 if path is not None:
581 yield path, nuts
583 def undig_few(self, paths, format='detect'):
584 for path in paths:
585 nuts = self.undig(path)
586 if nuts:
587 yield (nuts[0].file_format, path), nuts
588 else:
589 yield (format, path), []
591 def undig_many(self, paths, show_progress=True):
592 selection = self.new_selection(paths, show_progress=show_progress)
594 for (_, path), nuts in selection.undig_grouped():
595 yield path, nuts
597 del selection
599 def new_selection(self, paths=None, format='detect', show_progress=True):
600 from .selection import Selection
601 selection = Selection(self)
602 if paths:
603 selection.add(paths, format=format, show_progress=show_progress)
604 return selection
606 def undig_content(self, nut):
607 return None
609 def remove(self, path):
610 '''
611 Prune content meta-information about a given file.
613 All content pieces belonging to file ``path`` are removed from the
614 main database and any attached live selections (via database triggers).
615 '''
617 path = self.relpath(abspath(path))
619 with self.transaction('remove file') as cursor:
620 cursor.execute(
621 'DELETE FROM files WHERE path = ?', (path,))
623 def remove_glob(self, pattern):
624 '''
625 Prune content meta-information about files matching given pattern.
627 All content pieces belonging to files who's pathes match the given
628 ``pattern`` are removed from the main database and any attached live
629 selections (via database triggers).
630 '''
632 with self.transaction('remove file glob') as cursor:
633 return cursor.execute(
634 'DELETE FROM files WHERE path GLOB ?', (pattern,)).rowcount
636 def _remove_volatile(self):
637 '''
638 Prune leftover volatile content from database.
640 If the cleanup handler of an attached selection is not called, e.g. due
641 to a crash or terminated process, volatile content will not be removed
642 properly. This method will delete such leftover entries.
644 This is a mainenance operatation which should only be called when no
645 apps are using the database because it would remove volatile content
646 currently used by the apps.
647 '''
649 with self.transaction('remove volatile') as cursor:
650 return cursor.execute(
651 '''
652 DELETE FROM files
653 WHERE path LIKE 'virtual:volatile:%'
654 ''').rowcount
656 def reset(self, path, transaction=None):
657 '''
658 Prune information associated with a given file, but keep the file path.
660 This method is called when reading a file failed. File attributes,
661 format, size and modification time are set to NULL. File content
662 meta-information is removed from the database and any attached live
663 selections (via database triggers).
664 '''
666 path = self.relpath(abspath(path))
668 with (transaction or self.transaction('reset file')) as cursor:
669 cursor.execute(
670 '''
671 UPDATE files SET
672 format = NULL,
673 mtime = NULL,
674 size = NULL
675 WHERE path = ?
676 ''', (path,))
678 def silent_touch(self, path):
679 '''
680 Update modification time of file without initiating reindexing.
682 Useful to prolong validity period of data with expiration date.
683 '''
685 apath = abspath(path)
686 path = self.relpath(apath)
688 with self.transaction('silent touch') as cursor:
690 sql = 'SELECT format, size FROM files WHERE path = ?'
691 fmt, size = execute_get1(cursor, sql, (path,))
693 mod = io.get_backend(fmt)
694 mod.touch(apath)
695 file_stats = mod.get_stats(apath)
697 if file_stats[1] != size:
698 raise FileLoadError(
699 'Silent update for file "%s" failed: size has changed.'
700 % apath)
702 sql = '''
703 UPDATE files
704 SET mtime = ?
705 WHERE path = ?
706 '''
707 cursor.execute(sql, (file_stats[0], path))
709 def _iter_codes_info(
710 self, kind=None, codes=None, kind_codes_count='kind_codes_count'):
712 args = []
713 sel = ''
714 if kind is not None:
715 kind_id = to_kind_id(kind)
717 sel = 'AND kind_codes.kind_id == ?'
718 args.append(to_kind_id(kind))
720 if codes is not None:
721 assert kind is not None # TODO supp by recursing possible kinds
722 kind_id = to_kind_id(kind)
723 pats = codes_patterns_for_kind(kind_id, codes)
725 if pats:
726 # could optimize this by using IN for non-patterns
727 sel += ' AND ( %s ) ' % ' OR '.join(
728 ('kind_codes.codes GLOB ?',) * len(pats))
730 args.extend(pat.safe_str for pat in pats)
732 sql = ('''
733 SELECT
734 kind_codes.kind_id,
735 kind_codes.codes,
736 kind_codes.deltat,
737 kind_codes.kind_codes_id,
738 %(kind_codes_count)s.count
739 FROM %(kind_codes_count)s
740 INNER JOIN kind_codes
741 ON %(kind_codes_count)s.kind_codes_id
742 == kind_codes.kind_codes_id
743 WHERE %(kind_codes_count)s.count > 0
744 ''' + sel + '''
745 ''') % {'kind_codes_count': kind_codes_count}
747 for kind_id, scodes, deltat, kcid, count in self._conn.execute(
748 sql, args):
750 yield (
751 kind_id, to_codes_simple(kind_id, scodes), deltat, kcid, count)
753 def _iter_deltats(self, kind=None, kind_codes_count='kind_codes_count'):
754 args = []
755 sel = ''
756 if kind is not None:
757 assert isinstance(kind, str)
758 sel = 'AND kind_codes.kind_id == ?'
759 args.append(to_kind_id(kind))
761 sql = ('''
762 SELECT DISTINCT kind_codes.deltat FROM %(kind_codes_count)s
763 INNER JOIN kind_codes
764 ON %(kind_codes_count)s.kind_codes_id
765 == kind_codes.kind_codes_id
766 WHERE %(kind_codes_count)s.count > 0
767 ''' + sel + '''
768 ORDER BY kind_codes.deltat
769 ''') % {'kind_codes_count': kind_codes_count}
771 for row in self._conn.execute(sql, args):
772 yield row[0]
774 def _iter_codes(self, kind=None, kind_codes_count='kind_codes_count'):
775 args = []
776 sel = ''
777 if kind is not None:
778 assert isinstance(kind, str)
779 sel = 'AND kind_codes.kind_id == ?'
780 args.append(to_kind_id(kind))
782 sql = ('''
783 SELECT DISTINCT kind_codes.kind_id, kind_codes.codes
784 FROM %(kind_codes_count)s
785 INNER JOIN kind_codes
786 ON %(kind_codes_count)s.kind_codes_id
787 == kind_codes.kind_codes_id
788 WHERE %(kind_codes_count)s.count > 0
789 ''' + sel + '''
790 ORDER BY kind_codes.codes
792 ''') % dict(kind_codes_count=kind_codes_count)
794 for row in self._conn.execute(sql, args):
795 yield to_codes_simple(*row)
797 def _iter_kinds(self, codes=None, kind_codes_count='kind_codes_count'):
798 args = []
799 sel = ''
800 if codes is not None:
801 sel = 'AND kind_codes.codes == ?'
802 args.append(codes.safe_str)
804 sql = ('''
805 SELECT DISTINCT kind_codes.kind_id FROM %(kind_codes_count)s
806 INNER JOIN kind_codes
807 ON %(kind_codes_count)s.kind_codes_id
808 == kind_codes.kind_codes_id
809 WHERE %(kind_codes_count)s.count > 0
810 ''' + sel + '''
811 ORDER BY kind_codes.kind_id
812 ''') % {'kind_codes_count': kind_codes_count}
814 for row in self._conn.execute(sql, args):
815 yield to_kind(row[0])
817 def iter_paths(self):
818 for row in self._conn.execute('''SELECT path FROM files'''):
819 yield self.abspath(row[0])
821 def iter_nnuts_by_file(self):
822 sql = '''
823 SELECT
824 path,
825 (SELECT COUNT(*) FROM nuts WHERE nuts.file_id = files.file_id)
826 FROM files
827 '''
828 for row in self._conn.execute(sql):
829 yield (self.abspath(row[0]),) + row[1:]
831 def iter_kinds(self, codes=None):
832 return self._iter_kinds(codes=codes)
834 def iter_codes(self, kind=None):
835 return self._iter_codes(kind=kind)
837 def get_paths(self):
838 return list(self.iter_paths())
840 def get_kinds(self, codes=None):
841 return list(self.iter_kinds(codes=codes))
843 def get_codes(self, kind=None):
844 return list(self.iter_codes(kind=kind))
846 def get_counts(self, kind=None):
847 d = {}
848 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind):
849 if kind_id not in d:
850 v = d[kind_id] = {}
851 else:
852 v = d[kind_id]
854 if codes not in v:
855 v[codes] = 0
857 v[codes] += count
859 if kind is not None:
860 return d[to_kind_id(kind)]
861 else:
862 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items())
864 def get_nfiles(self):
865 sql = '''SELECT COUNT(*) FROM files'''
866 for row in self._conn.execute(sql):
867 return row[0]
869 def get_nnuts(self):
870 sql = '''SELECT COUNT(*) FROM nuts'''
871 for row in self._conn.execute(sql):
872 return row[0]
874 def get_nnuts_by_file(self):
875 return list(self.iter_nnuts_by_file())
877 def get_total_size(self):
878 sql = '''
879 SELECT SUM(files.size) FROM files
880 '''
882 for row in self._conn.execute(sql):
883 return row[0] or 0
885 def get_persistent_names(self):
886 sql = '''
887 SELECT name FROM persistent
888 '''
889 return [row[0] for row in self._conn.execute(sql)]
891 def get_stats(self):
892 return DatabaseStats(
893 nfiles=self.get_nfiles(),
894 nnuts=self.get_nnuts(),
895 kinds=self.get_kinds(),
896 codes=self.get_codes(),
897 counts=self.get_counts(),
898 total_size=self.get_total_size(),
899 persistent=self.get_persistent_names())
901 def __str__(self):
902 return str(self.get_stats())
904 def print_tables(self, stream=None):
905 for table in [
906 'persistent',
907 'files',
908 'nuts',
909 'kind_codes',
910 'kind_codes_count']:
912 self.print_table(table, stream=stream)
914 def print_table(self, name, stream=None):
916 if stream is None:
917 stream = sys.stdout
919 class hstr(str):
920 def __repr__(self):
921 return self
923 w = stream.write
924 w('\n')
925 w('\n')
926 w(name)
927 w('\n')
928 sql = 'SELECT * FROM %s' % name
929 tab = []
930 if name in self._tables:
931 headers = self._tables[name]
932 tab.append([None for _ in headers])
933 tab.append([hstr(x[0]) for x in headers])
934 tab.append([hstr(x[1]) for x in headers])
935 tab.append([None for _ in headers])
937 for row in self._conn.execute(sql):
938 tab.append([x for x in row])
940 widths = [
941 max((len(repr(x)) if x is not None else 0) for x in col)
942 for col in zip(*tab)]
944 for row in tab:
945 w(' '.join(
946 (repr(x).ljust(wid) if x is not None else ''.ljust(wid, '-'))
947 for (x, wid) in zip(row, widths)))
949 w('\n')
951 w('\n')
954class DatabaseStats(Object):
955 '''
956 Container to hold statistics about contents cached in meta-information db.
957 '''
959 nfiles = Int.T(
960 help='Number of files in database.')
961 nnuts = Int.T(
962 help='Number of index nuts in database.')
963 codes = List.T(
964 Tuple.T(content_t=String.T()),
965 help='Available code sequences in database, e.g. '
966 '(agency, network, station, location) for stations nuts.')
967 kinds = List.T(
968 String.T(),
969 help='Available content types in database.')
970 total_size = Int.T(
971 help='Aggregated file size [bytes] of files referenced in database.')
972 counts = Dict.T(
973 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()),
974 help='Breakdown of how many nuts of any content type and code '
975 'sequence are available in database, ``counts[kind][codes]``.')
976 persistent = List.T(
977 String.T(),
978 help='Names of persistent selections stored in database.')
980 def __str__(self):
981 kind_counts = dict(
982 (kind, sum(self.counts[kind].values())) for kind in self.kinds)
984 codes = [c.safe_str for c in self.codes]
986 if len(codes) > 20:
987 scodes = '\n' + util.ewrap(codes[:10], indent=' ') \
988 + '\n [%i more]\n' % (len(codes) - 20) \
989 + util.ewrap(codes[-10:], indent=' ')
990 else:
991 scodes = '\n' + util.ewrap(codes, indent=' ') \
992 if codes else '<none>'
994 s = '''
995Available codes: %s
996Number of files: %i
997Total size of known files: %s
998Number of index nuts: %i
999Available content kinds: %s
1000Persistent selections: %s''' % (
1001 scodes,
1002 self.nfiles,
1003 util.human_bytesize(self.total_size),
1004 self.nnuts,
1005 ', '.join('%s: %i' % (
1006 kind, kind_counts[kind]) for kind in sorted(self.kinds)),
1007 ', '.join(self.persistent))
1009 return s
1012__all__ = [
1013 'Database',
1014 'DatabaseStats',
1015]