1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6from __future__ import absolute_import, print_function 

7 

8import sys 

9import os 

10import logging 

11import sqlite3 

12import re 

13import time 

14import types 

15import weakref 

16 

17from pyrocko.io.io_common import FileLoadError 

18from pyrocko import util 

19from pyrocko.guts import Object, Int, List, Dict, Tuple, String 

20from . import error, io 

21from .model import Nut, to_kind_id, to_kind, separator 

22from .error import SquirrelError 

23 

24logger = logging.getLogger('psq.database') 

25 

26guts_prefix = 'squirrel' 

27 

28 

29def abspath(path): 

30 if not path.startswith('virtual:') and not path.startswith('client:'): 

31 return os.path.abspath(path) 

32 else: 

33 return path 

34 

35 

36class ExecuteGet1Error(SquirrelError): 

37 pass 

38 

39 

40def execute_get1(connection, sql, args): 

41 rows = list(connection.execute(sql, args)) 

42 if len(rows) == 1: 

43 return rows[0] 

44 else: 

45 raise ExecuteGet1Error('Expected database entry not found.') 

46 

47 

48g_databases = {} 

49 

50 

51def get_database(path): 

52 path = os.path.abspath(path) 

53 if path not in g_databases: 

54 g_databases[path] = Database(path) 

55 

56 return g_databases[path] 

57 

58 

59def close_database(database): 

60 path = os.path.abspath(database._database_path) 

61 database._conn.close() 

62 if path in g_databases: 

63 del g_databases[path] 

64 

65 

66class Transaction(object): 

67 def __init__( 

68 self, conn, 

69 label='', 

70 mode='immediate', 

71 retry_interval=0.1, 

72 callback=None): 

73 

74 self.cursor = conn.cursor() 

75 assert mode in ('deferred', 'immediate', 'exclusive') 

76 self.mode = mode 

77 self.depth = 0 

78 self.rollback_wanted = False 

79 self.retry_interval = retry_interval 

80 self.callback = callback 

81 self.label = label 

82 self.started = False 

83 

84 def begin(self): 

85 if self.depth == 0: 

86 tries = 0 

87 while True: 

88 try: 

89 tries += 1 

90 self.cursor.execute('BEGIN %s' % self.mode.upper()) 

91 self.started = True 

92 logger.debug( 

93 'Transaction started: %-30s (pid: %s, mode: %s)' 

94 % (self.label, os.getpid(), self.mode)) 

95 

96 self.total_changes_begin \ 

97 = self.cursor.connection.total_changes 

98 break 

99 

100 except sqlite3.OperationalError as e: 

101 if not str(e) == 'database is locked': 

102 raise 

103 

104 logger.info( 

105 'Database is locked retrying in %s s: %s ' 

106 '(pid: %s, tries: %i)' % ( 

107 self.retry_interval, self.label, 

108 os.getpid(), tries)) 

109 

110 time.sleep(self.retry_interval) 

111 

112 self.depth += 1 

113 

114 def commit(self): 

115 if not self.started: 

116 raise Exception( 

117 'Trying to commit without having started a transaction.') 

118 

119 self.depth -= 1 

120 if self.depth == 0: 

121 if not self.rollback_wanted: 

122 self.cursor.execute('COMMIT') 

123 self.started = False 

124 if self.total_changes_begin is not None: 

125 total_changes = self.cursor.connection.total_changes \ 

126 - self.total_changes_begin 

127 else: 

128 total_changes = None 

129 

130 if self.callback is not None and total_changes: 

131 self.callback('modified', total_changes) 

132 

133 logger.debug( 

134 'Transaction completed: %-30s ' 

135 '(pid: %s, changes: %i)' % ( 

136 self.label, os.getpid(), total_changes or 0)) 

137 

138 else: 

139 self.cursor.execute('ROLLBACK') 

140 self.started = False 

141 logger.warning('Deferred rollback executed.') 

142 logger.debug( 

143 'Transaction failed: %-30s (pid: %s)' % ( 

144 self.label, os.getpid())) 

145 self.rollback_wanted = False 

146 

147 def rollback(self): 

148 if not self.started: 

149 raise Exception( 

150 'Trying to rollback without having started a transaction.') 

151 

152 self.depth -= 1 

153 if self.depth == 0: 

154 self.cursor.execute('ROLLBACK') 

155 self.started = False 

156 

157 logger.debug( 

158 'Transaction failed: %-30s (pid: %s)' % ( 

159 self.label, os.getpid())) 

160 

161 self.rollback_wanted = False 

162 else: 

163 logger.warning('Deferred rollback scheduled.') 

164 self.rollback_wanted = True 

165 

166 def close(self): 

167 self.cursor.close() 

168 

169 def __enter__(self): 

170 self.begin() 

171 return self.cursor 

172 

173 def __exit__(self, exc_type, exc_value, traceback): 

174 if exc_type is None: 

175 self.commit() 

176 else: 

177 self.rollback() 

178 

179 if self.depth == 0: 

180 self.close() 

181 self.callback = None 

182 

183 

184class Database(object): 

185 ''' 

186 Shared meta-information database used by Squirrel. 

187 ''' 

188 

189 def __init__(self, database_path=':memory:', log_statements=False): 

190 self._database_path = database_path 

191 if database_path != ':memory:': 

192 util.ensuredirs(database_path) 

193 

194 try: 

195 logger.debug('Opening connection to database: %s' % database_path) 

196 self._conn = sqlite3.connect(database_path, isolation_level=None) 

197 except sqlite3.OperationalError: 

198 raise error.SquirrelError( 

199 'Cannot connect to database: %s' % database_path) 

200 

201 self._conn.text_factory = str 

202 self._tables = {} 

203 

204 if log_statements: 

205 self._conn.set_trace_callback(self._log_statement) 

206 

207 self._initialize_db() 

208 self._basepath = None 

209 self._listeners = [] 

210 

211 def set_basepath(self, basepath): 

212 if basepath is not None: 

213 self._basepath = os.path.abspath(basepath) 

214 else: 

215 self._basepath = None 

216 

217 def relpath(self, path): 

218 if self._basepath is not None and path.startswith( 

219 self._basepath + os.path.sep): 

220 return path[len(self._basepath) + 1:] 

221 else: 

222 return path 

223 

224 def abspath(self, path): 

225 if self._basepath is not None and not path.startswith('virtual:') \ 

226 and not path.startswith('client:') \ 

227 and not os.path.isabs(path): 

228 return os.path.join(self._basepath, path) 

229 else: 

230 return path 

231 

232 def _log_statement(self, statement): 

233 logger.debug(statement) 

234 

235 def get_connection(self): 

236 return self._conn 

237 

238 def transaction(self, label='', mode='immediate'): 

239 return Transaction( 

240 self._conn, 

241 label=label, 

242 mode=mode, 

243 callback=self._notify_listeners) 

244 

245 def add_listener(self, listener): 

246 if isinstance(listener, types.MethodType): 

247 listener_ref = weakref.WeakMethod(listener) 

248 else: 

249 listener_ref = weakref.ref(listener) 

250 

251 self._listeners.append(listener_ref) 

252 return listener_ref 

253 

254 def remove_listener(self, listener_ref): 

255 self._listeners.remove(listener_ref) 

256 

257 def _notify_listeners(self, event, *args): 

258 dead = [] 

259 for listener_ref in self._listeners: 

260 listener = listener_ref() 

261 if listener is not None: 

262 listener(event, *args) 

263 else: 

264 dead.append(listener_ref) 

265 

266 for listener_ref in dead: 

267 self.remove_listener(listener_ref) 

268 

269 def _register_table(self, s): 

270 m = re.search(r'(\S+)\s*\(([^)]+)\)', s) 

271 table_name = m.group(1) 

272 dtypes = m.group(2) 

273 table_header = [] 

274 for dele in dtypes.split(','): 

275 table_header.append(dele.split()[:2]) 

276 

277 self._tables[table_name] = table_header 

278 

279 return s 

280 

281 def _initialize_db(self): 

282 with self.transaction('initialize') as cursor: 

283 cursor.execute( 

284 '''PRAGMA recursive_triggers = true''') 

285 

286 cursor.execute( 

287 '''PRAGMA busy_timeout = 30000''') 

288 

289 if 2 == len(list( 

290 cursor.execute( 

291 ''' 

292 SELECT name FROM sqlite_master 

293 WHERE type = 'table' AND name IN ( 

294 'files', 

295 'persistent') 

296 '''))): 

297 

298 return 

299 

300 cursor.execute(self._register_table( 

301 ''' 

302 CREATE TABLE IF NOT EXISTS files ( 

303 file_id integer PRIMARY KEY, 

304 path text, 

305 format text, 

306 mtime float, 

307 size integer) 

308 ''')) 

309 

310 cursor.execute( 

311 ''' 

312 CREATE UNIQUE INDEX IF NOT EXISTS index_files_path 

313 ON files (path) 

314 ''') 

315 

316 cursor.execute(self._register_table( 

317 ''' 

318 CREATE TABLE IF NOT EXISTS nuts ( 

319 nut_id integer PRIMARY KEY AUTOINCREMENT, 

320 file_id integer, 

321 file_segment integer, 

322 file_element integer, 

323 kind_id integer, 

324 kind_codes_id integer, 

325 tmin_seconds integer, 

326 tmin_offset integer, 

327 tmax_seconds integer, 

328 tmax_offset integer, 

329 kscale integer) 

330 ''')) 

331 

332 cursor.execute( 

333 ''' 

334 CREATE UNIQUE INDEX IF NOT EXISTS index_nuts_file_element 

335 ON nuts (file_id, file_segment, file_element) 

336 ''') 

337 

338 cursor.execute(self._register_table( 

339 ''' 

340 CREATE TABLE IF NOT EXISTS kind_codes ( 

341 kind_codes_id integer PRIMARY KEY, 

342 kind_id integer, 

343 codes text, 

344 deltat float) 

345 ''')) 

346 

347 cursor.execute( 

348 ''' 

349 CREATE UNIQUE INDEX IF NOT EXISTS index_kind_codes 

350 ON kind_codes (kind_id, codes, deltat) 

351 ''') 

352 

353 cursor.execute(self._register_table( 

354 ''' 

355 CREATE TABLE IF NOT EXISTS kind_codes_count ( 

356 kind_codes_id integer PRIMARY KEY, 

357 count integer) 

358 ''')) 

359 

360 cursor.execute( 

361 ''' 

362 CREATE INDEX IF NOT EXISTS index_nuts_file_id 

363 ON nuts (file_id) 

364 ''') 

365 

366 cursor.execute( 

367 ''' 

368 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_delete_file 

369 BEFORE DELETE ON files FOR EACH ROW 

370 BEGIN 

371 DELETE FROM nuts where file_id == old.file_id; 

372 END 

373 ''') 

374 

375 # trigger only on size to make silent update of mtime possible 

376 cursor.execute( 

377 ''' 

378 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_update_file 

379 BEFORE UPDATE OF size ON files FOR EACH ROW 

380 BEGIN 

381 DELETE FROM nuts where file_id == old.file_id; 

382 END 

383 ''') 

384 

385 cursor.execute( 

386 ''' 

387 CREATE TRIGGER IF NOT EXISTS increment_kind_codes 

388 BEFORE INSERT ON nuts FOR EACH ROW 

389 BEGIN 

390 INSERT OR IGNORE INTO kind_codes_count 

391 VALUES (new.kind_codes_id, 0); 

392 UPDATE kind_codes_count 

393 SET count = count + 1 

394 WHERE new.kind_codes_id == kind_codes_id; 

395 END 

396 ''') 

397 

398 cursor.execute( 

399 ''' 

400 CREATE TRIGGER IF NOT EXISTS decrement_kind_codes 

401 BEFORE DELETE ON nuts FOR EACH ROW 

402 BEGIN 

403 UPDATE kind_codes_count 

404 SET count = count - 1 

405 WHERE old.kind_codes_id == kind_codes_id; 

406 END 

407 ''') 

408 

409 cursor.execute(self._register_table( 

410 ''' 

411 CREATE TABLE IF NOT EXISTS persistent ( 

412 name text UNIQUE) 

413 ''')) 

414 

415 def dig(self, nuts, transaction=None): 

416 ''' 

417 Store or update content meta-information. 

418 

419 Given ``nuts`` are assumed to represent an up-to-date and complete 

420 inventory of a set of files. Any old information about these files is 

421 first pruned from the database (via database triggers). If such content 

422 is part of a live selection, it is also removed there. Then the new 

423 content meta-information is inserted into the main database. The 

424 content is not automatically inserted into the live selections again. 

425 It is in the responsibility of the selection object to perform this 

426 step. 

427 ''' 

428 

429 nuts = list(nuts) 

430 

431 if not nuts: 

432 return 

433 

434 files = set() 

435 kind_codes = set() 

436 for nut in nuts: 

437 files.add(( 

438 self.relpath(nut.file_path), 

439 nut.file_format, 

440 nut.file_mtime, 

441 nut.file_size)) 

442 kind_codes.add((nut.kind_id, nut.codes, nut.deltat or 0.0)) 

443 

444 with (transaction or self.transaction('dig')) as c: 

445 

446 c.executemany( 

447 'INSERT OR IGNORE INTO files VALUES (NULL,?,?,?,?)', files) 

448 

449 c.executemany( 

450 '''UPDATE files SET 

451 format = ?, mtime = ?, size = ? 

452 WHERE path == ? 

453 ''', 

454 ((x[1], x[2], x[3], x[0]) for x in files)) 

455 

456 c.executemany( 

457 'INSERT OR IGNORE INTO kind_codes VALUES (NULL,?,?,?)', 

458 kind_codes) 

459 

460 c.executemany( 

461 ''' 

462 INSERT INTO nuts VALUES 

463 (NULL, ( 

464 SELECT file_id FROM files 

465 WHERE path == ? 

466 ),?,?,?, 

467 ( 

468 SELECT kind_codes_id FROM kind_codes 

469 WHERE kind_id == ? AND codes == ? AND deltat == ? 

470 ), ?,?,?,?,?) 

471 ''', 

472 ((self.relpath(nut.file_path), 

473 nut.file_segment, nut.file_element, 

474 nut.kind_id, 

475 nut.kind_id, nut.codes, nut.deltat or 0.0, 

476 nut.tmin_seconds, nut.tmin_offset, 

477 nut.tmax_seconds, nut.tmax_offset, 

478 nut.kscale) for nut in nuts)) 

479 

480 def undig(self, path): 

481 

482 path = self.relpath(abspath(path)) 

483 

484 sql = ''' 

485 SELECT 

486 files.path, 

487 files.format, 

488 files.mtime, 

489 files.size, 

490 nuts.file_segment, 

491 nuts.file_element, 

492 kind_codes.kind_id, 

493 kind_codes.codes, 

494 nuts.tmin_seconds, 

495 nuts.tmin_offset, 

496 nuts.tmax_seconds, 

497 nuts.tmax_offset, 

498 kind_codes.deltat 

499 FROM files 

500 INNER JOIN nuts ON files.file_id = nuts.file_id 

501 INNER JOIN kind_codes 

502 ON nuts.kind_codes_id == kind_codes.kind_codes_id 

503 WHERE path == ? 

504 ''' 

505 

506 return [Nut(values_nocheck=(self.abspath(row[0]),) + row[1:]) 

507 for row in self._conn.execute(sql, (path,))] 

508 

509 def undig_all(self): 

510 sql = ''' 

511 SELECT 

512 files.path, 

513 files.format, 

514 files.mtime, 

515 files.size, 

516 nuts.file_segment, 

517 nuts.file_element, 

518 kind_codes.kind_id, 

519 kind_codes.codes, 

520 nuts.tmin_seconds, 

521 nuts.tmin_offset, 

522 nuts.tmax_seconds, 

523 nuts.tmax_offset, 

524 kind_codes.deltat 

525 FROM files 

526 INNER JOIN nuts ON files.file_id == nuts.file_id 

527 INNER JOIN kind_codes 

528 ON nuts.kind_codes_id == kind_codes.kind_codes_id 

529 ''' 

530 

531 nuts = [] 

532 path = None 

533 for values in self._conn.execute(sql): 

534 if path is not None and values[0] != path: 

535 yield path, nuts 

536 nuts = [] 

537 

538 path = self.abspath(values[0]) 

539 

540 if values[1] is not None: 

541 nuts.append(Nut(values_nocheck=(path,) + values[1:])) 

542 

543 if path is not None: 

544 yield path, nuts 

545 

546 def undig_few(self, paths, format='detect'): 

547 for path in paths: 

548 nuts = self.undig(path) 

549 if nuts: 

550 yield (nuts[0].file_format, path), nuts 

551 else: 

552 yield (format, path), [] 

553 

554 def undig_many(self, paths, show_progress=True): 

555 selection = self.new_selection(paths, show_progress=show_progress) 

556 

557 for (_, path), nuts in selection.undig_grouped(): 

558 yield path, nuts 

559 

560 del selection 

561 

562 def new_selection(self, paths=None, format='detect', show_progress=True): 

563 from .selection import Selection 

564 selection = Selection(self) 

565 if paths: 

566 selection.add(paths, format=format, show_progress=show_progress) 

567 return selection 

568 

569 def undig_content(self, nut): 

570 return None 

571 

572 def remove(self, path): 

573 ''' 

574 Prune content meta-information about a given file. 

575 

576 All content pieces belonging to file ``path`` are removed from the 

577 main database and any attached live selections (via database triggers). 

578 ''' 

579 

580 path = self.relpath(abspath(path)) 

581 

582 with self.transaction('remove file') as cursor: 

583 cursor.execute( 

584 'DELETE FROM files WHERE path = ?', (path,)) 

585 

586 def remove_glob(self, pattern): 

587 ''' 

588 Prune content meta-information about files matching given pattern. 

589 

590 All content pieces belonging to files who's pathes match the given 

591 ``pattern`` are removed from the main database and any attached live 

592 selections (via database triggers). 

593 ''' 

594 

595 with self.transaction('remove file glob') as cursor: 

596 return cursor.execute( 

597 'DELETE FROM files WHERE path GLOB ?', (pattern,)).rowcount 

598 

599 def _remove_volatile(self): 

600 ''' 

601 Prune leftover volatile content from database. 

602 

603 If the cleanup handler of an attached selection is not called, e.g. due 

604 to a crash or terminated process, volatile content will not be removed 

605 properly. This method will delete such leftover entries. 

606 

607 This is a mainenance operatation which should only be called when no 

608 apps are using the database because it would remove volatile content 

609 currently used by the apps. 

610 ''' 

611 

612 with self.transaction('remove volatile') as cursor: 

613 return cursor.execute( 

614 ''' 

615 DELETE FROM files 

616 WHERE path LIKE 'virtual:volatile:%' 

617 ''').rowcount 

618 

619 def reset(self, path, transaction=None): 

620 ''' 

621 Prune information associated with a given file, but keep the file path. 

622 

623 This method is called when reading a file failed. File attributes, 

624 format, size and modification time are set to NULL. File content 

625 meta-information is removed from the database and any attached live 

626 selections (via database triggers). 

627 ''' 

628 

629 path = self.relpath(abspath(path)) 

630 

631 with (transaction or self.transaction('reset file')) as cursor: 

632 cursor.execute( 

633 ''' 

634 UPDATE files SET 

635 format = NULL, 

636 mtime = NULL, 

637 size = NULL 

638 WHERE path = ? 

639 ''', (path,)) 

640 

641 def silent_touch(self, path): 

642 ''' 

643 Update modification time of file without initiating reindexing. 

644 

645 Useful to prolong validity period of data with expiration date. 

646 ''' 

647 

648 apath = abspath(path) 

649 path = self.relpath(apath) 

650 

651 with self.transaction('silent touch') as cursor: 

652 

653 sql = 'SELECT format, size FROM files WHERE path = ?' 

654 fmt, size = execute_get1(cursor, sql, (path,)) 

655 

656 mod = io.get_backend(fmt) 

657 mod.touch(apath) 

658 file_stats = mod.get_stats(apath) 

659 

660 if file_stats[1] != size: 

661 raise FileLoadError( 

662 'Silent update for file "%s" failed: size has changed.' 

663 % apath) 

664 

665 sql = ''' 

666 UPDATE files 

667 SET mtime = ? 

668 WHERE path = ? 

669 ''' 

670 cursor.execute(sql, (file_stats[0], path)) 

671 

672 def _iter_counts(self, kind=None, kind_codes_count='kind_codes_count'): 

673 args = [] 

674 sel = '' 

675 if kind is not None: 

676 sel = 'AND kind_codes.kind_id == ?' 

677 args.append(to_kind_id(kind)) 

678 

679 sql = (''' 

680 SELECT 

681 kind_codes.kind_id, 

682 kind_codes.codes, 

683 kind_codes.deltat, 

684 %(kind_codes_count)s.count 

685 FROM %(kind_codes_count)s 

686 INNER JOIN kind_codes 

687 ON %(kind_codes_count)s.kind_codes_id 

688 == kind_codes.kind_codes_id 

689 WHERE %(kind_codes_count)s.count > 0 

690 ''' + sel + ''' 

691 ''') % {'kind_codes_count': kind_codes_count} 

692 

693 for kind_id, codes, deltat, count in self._conn.execute(sql, args): 

694 yield ( 

695 to_kind(kind_id), 

696 tuple(codes.split(separator)), 

697 deltat), count 

698 

699 def _iter_deltats(self, kind=None, kind_codes_count='kind_codes_count'): 

700 args = [] 

701 sel = '' 

702 if kind is not None: 

703 assert isinstance(kind, str) 

704 sel = 'AND kind_codes.kind_id == ?' 

705 args.append(to_kind_id(kind)) 

706 

707 sql = (''' 

708 SELECT DISTINCT kind_codes.deltat FROM %(kind_codes_count)s 

709 INNER JOIN kind_codes 

710 ON %(kind_codes_count)s.kind_codes_id 

711 == kind_codes.kind_codes_id 

712 WHERE %(kind_codes_count)s.count > 0 

713 ''' + sel + ''' 

714 ORDER BY kind_codes.deltat 

715 ''') % {'kind_codes_count': kind_codes_count} 

716 

717 for row in self._conn.execute(sql, args): 

718 yield row[0] 

719 

720 def _iter_codes(self, kind=None, kind_codes_count='kind_codes_count'): 

721 args = [] 

722 sel = '' 

723 if kind is not None: 

724 assert isinstance(kind, str) 

725 sel = 'AND kind_codes.kind_id == ?' 

726 args.append(to_kind_id(kind)) 

727 

728 sql = (''' 

729 SELECT DISTINCT kind_codes.codes FROM %(kind_codes_count)s 

730 INNER JOIN kind_codes 

731 ON %(kind_codes_count)s.kind_codes_id 

732 == kind_codes.kind_codes_id 

733 WHERE %(kind_codes_count)s.count > 0 

734 ''' + sel + ''' 

735 ORDER BY kind_codes.codes 

736 ''') % {'kind_codes_count': kind_codes_count} 

737 

738 for row in self._conn.execute(sql, args): 

739 yield tuple(row[0].split(separator)) 

740 

741 def _iter_kinds(self, codes=None, kind_codes_count='kind_codes_count'): 

742 args = [] 

743 sel = '' 

744 if codes is not None: 

745 assert isinstance(codes, tuple) 

746 sel = 'AND kind_codes.codes == ?' 

747 args.append(separator.join(codes)) 

748 

749 sql = (''' 

750 SELECT DISTINCT kind_codes.kind_id FROM %(kind_codes_count)s 

751 INNER JOIN kind_codes 

752 ON %(kind_codes_count)s.kind_codes_id 

753 == kind_codes.kind_codes_id 

754 WHERE %(kind_codes_count)s.count > 0 

755 ''' + sel + ''' 

756 ORDER BY kind_codes.kind_id 

757 ''') % {'kind_codes_count': kind_codes_count} 

758 

759 for row in self._conn.execute(sql, args): 

760 yield to_kind(row[0]) 

761 

762 def iter_paths(self): 

763 for row in self._conn.execute('''SELECT path FROM files'''): 

764 yield self.abspath(row[0]) 

765 

766 def iter_nnuts_by_file(self): 

767 sql = ''' 

768 SELECT 

769 path, 

770 (SELECT COUNT(*) FROM nuts WHERE nuts.file_id = files.file_id) 

771 FROM files 

772 ''' 

773 for row in self._conn.execute(sql): 

774 yield (self.abspath(row[0]),) + row[1:] 

775 

776 def iter_kinds(self, codes=None): 

777 return self._iter_kinds(codes=codes) 

778 

779 def iter_codes(self, kind=None): 

780 return self._iter_codes(kind=kind) 

781 

782 def iter_counts(self, kind=None): 

783 return self._iter_counts(kind=kind) 

784 

785 def get_paths(self): 

786 return list(self.iter_paths()) 

787 

788 def get_kinds(self, codes=None): 

789 return list(self.iter_kinds(codes=codes)) 

790 

791 def get_codes(self, kind=None): 

792 return list(self.iter_codes(kind=kind)) 

793 

794 def get_counts(self, kind=None): 

795 d = {} 

796 for (k, codes, deltat), count in self.iter_counts(): 

797 if k not in d: 

798 v = d[k] = {} 

799 else: 

800 v = d[k] 

801 

802 if codes not in v: 

803 v[codes] = 0 

804 

805 v[codes] += count 

806 

807 if kind is not None: 

808 return d[kind] 

809 else: 

810 return d 

811 

812 def get_nfiles(self): 

813 sql = '''SELECT COUNT(*) FROM files''' 

814 for row in self._conn.execute(sql): 

815 return row[0] 

816 

817 def get_nnuts(self): 

818 sql = '''SELECT COUNT(*) FROM nuts''' 

819 for row in self._conn.execute(sql): 

820 return row[0] 

821 

822 def get_nnuts_by_file(self): 

823 return list(self.iter_nnuts_by_file()) 

824 

825 def get_total_size(self): 

826 sql = ''' 

827 SELECT SUM(files.size) FROM files 

828 ''' 

829 

830 for row in self._conn.execute(sql): 

831 return row[0] or 0 

832 

833 def get_persistent_names(self): 

834 sql = ''' 

835 SELECT name FROM persistent 

836 ''' 

837 return [row[0] for row in self._conn.execute(sql)] 

838 

839 def get_stats(self): 

840 return DatabaseStats( 

841 nfiles=self.get_nfiles(), 

842 nnuts=self.get_nnuts(), 

843 kinds=self.get_kinds(), 

844 codes=self.get_codes(), 

845 counts=self.get_counts(), 

846 total_size=self.get_total_size(), 

847 persistent=self.get_persistent_names()) 

848 

849 def __str__(self): 

850 return str(self.get_stats()) 

851 

852 def print_tables(self, stream=None): 

853 for table in [ 

854 'persistent', 

855 'files', 

856 'nuts', 

857 'kind_codes', 

858 'kind_codes_count']: 

859 

860 self.print_table(table, stream=stream) 

861 

862 def print_table(self, name, stream=None): 

863 

864 if stream is None: 

865 stream = sys.stdout 

866 

867 class hstr(str): 

868 def __repr__(self): 

869 return self 

870 

871 w = stream.write 

872 w('\n') 

873 w('\n') 

874 w(name) 

875 w('\n') 

876 sql = 'SELECT * FROM %s' % name 

877 tab = [] 

878 if name in self._tables: 

879 headers = self._tables[name] 

880 tab.append([None for _ in headers]) 

881 tab.append([hstr(x[0]) for x in headers]) 

882 tab.append([hstr(x[1]) for x in headers]) 

883 tab.append([None for _ in headers]) 

884 

885 for row in self._conn.execute(sql): 

886 tab.append([x for x in row]) 

887 

888 widths = [ 

889 max((len(repr(x)) if x is not None else 0) for x in col) 

890 for col in zip(*tab)] 

891 

892 for row in tab: 

893 w(' '.join( 

894 (repr(x).ljust(wid) if x is not None else ''.ljust(wid, '-')) 

895 for (x, wid) in zip(row, widths))) 

896 

897 w('\n') 

898 

899 w('\n') 

900 

901 

902class DatabaseStats(Object): 

903 ''' 

904 Container to hold statistics about contents cached in meta-information db. 

905 ''' 

906 

907 nfiles = Int.T( 

908 help='Number of files in database.') 

909 nnuts = Int.T( 

910 help='Number of index nuts in database.') 

911 codes = List.T( 

912 Tuple.T(content_t=String.T()), 

913 help='Available code sequences in database, e.g. ' 

914 '(agency, network, station, location) for stations nuts.') 

915 kinds = List.T( 

916 String.T(), 

917 help='Available content types in database.') 

918 total_size = Int.T( 

919 help='Aggregated file size [bytes] of files referenced in database.') 

920 counts = Dict.T( 

921 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

922 help='Breakdown of how many nuts of any content type and code ' 

923 'sequence are available in database, ``counts[kind][codes]``.') 

924 persistent = List.T( 

925 String.T(), 

926 help='Names of persistent selections stored in database.') 

927 

928 def __str__(self): 

929 kind_counts = dict( 

930 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

931 

932 codes = ['.'.join(x) for x in self.codes] 

933 

934 if len(codes) > 20: 

935 scodes = '\n' + util.ewrap(codes[:10], indent=' ') \ 

936 + '\n [%i more]\n' % (len(codes) - 20) \ 

937 + util.ewrap(codes[-10:], indent=' ') 

938 else: 

939 scodes = '\n' + util.ewrap(codes, indent=' ') \ 

940 if codes else '<none>' 

941 

942 s = ''' 

943Available codes: %s 

944Number of files: %i 

945Total size of known files: %s 

946Number of index nuts: %i 

947Available content kinds: %s 

948Persistent selections: %s''' % ( 

949 scodes, 

950 self.nfiles, 

951 util.human_bytesize(self.total_size), 

952 self.nnuts, 

953 ', '.join('%s: %i' % ( 

954 kind, kind_counts[kind]) for kind in sorted(self.kinds)), 

955 ', '.join(self.persistent)) 

956 

957 return s 

958 

959 

960__all__ = [ 

961 'Database', 

962 'DatabaseStats', 

963]