1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6from __future__ import absolute_import, print_function 

7 

8import sys 

9import os 

10import logging 

11import sqlite3 

12import re 

13import time 

14import types 

15import weakref 

16 

17from pyrocko.io.io_common import FileLoadError 

18from pyrocko import util 

19from pyrocko.guts import Object, Int, List, Dict, Tuple, String 

20from . import error, io 

21from .model import Nut, to_kind_id, to_kind, to_codes_simple 

22from .error import SquirrelError 

23 

24logger = logging.getLogger('psq.database') 

25 

26guts_prefix = 'squirrel' 

27 

28 

29def abspath(path): 

30 if not path.startswith('virtual:') and not path.startswith('client:'): 

31 return os.path.abspath(path) 

32 else: 

33 return path 

34 

35 

36class ExecuteGet1Error(SquirrelError): 

37 pass 

38 

39 

40def execute_get1(connection, sql, args=()): 

41 rows = list(connection.execute(sql, args)) 

42 if len(rows) == 1: 

43 return rows[0] 

44 else: 

45 raise ExecuteGet1Error('Expected database entry not found.') 

46 

47 

48g_databases = {} 

49 

50 

51def get_database(path): 

52 path = os.path.abspath(path) 

53 if path not in g_databases: 

54 g_databases[path] = Database(path) 

55 

56 return g_databases[path] 

57 

58 

59def close_database(database): 

60 path = os.path.abspath(database._database_path) 

61 database._conn.close() 

62 if path in g_databases: 

63 del g_databases[path] 

64 

65 

66class Transaction(object): 

67 def __init__( 

68 self, conn, 

69 label='', 

70 mode='immediate', 

71 retry_interval=0.1, 

72 callback=None): 

73 

74 self.cursor = conn.cursor() 

75 assert mode in ('deferred', 'immediate', 'exclusive') 

76 self.mode = mode 

77 self.depth = 0 

78 self.rollback_wanted = False 

79 self.retry_interval = retry_interval 

80 self.callback = callback 

81 self.label = label 

82 self.started = False 

83 

84 def begin(self): 

85 if self.depth == 0: 

86 tries = 0 

87 while True: 

88 try: 

89 tries += 1 

90 self.cursor.execute('BEGIN %s' % self.mode.upper()) 

91 self.started = True 

92 logger.debug( 

93 'Transaction started: %-30s (pid: %s, mode: %s)' 

94 % (self.label, os.getpid(), self.mode)) 

95 

96 self.total_changes_begin \ 

97 = self.cursor.connection.total_changes 

98 break 

99 

100 except sqlite3.OperationalError as e: 

101 if not str(e) == 'database is locked': 

102 raise 

103 

104 logger.info( 

105 'Database is locked retrying in %s s: %s ' 

106 '(pid: %s, tries: %i)' % ( 

107 self.retry_interval, self.label, 

108 os.getpid(), tries)) 

109 

110 time.sleep(self.retry_interval) 

111 

112 self.depth += 1 

113 

114 def commit(self): 

115 if not self.started: 

116 raise Exception( 

117 'Trying to commit without having started a transaction.') 

118 

119 self.depth -= 1 

120 if self.depth == 0: 

121 if not self.rollback_wanted: 

122 self.cursor.execute('COMMIT') 

123 self.started = False 

124 if self.total_changes_begin is not None: 

125 total_changes = self.cursor.connection.total_changes \ 

126 - self.total_changes_begin 

127 else: 

128 total_changes = None 

129 

130 if self.callback is not None and total_changes: 

131 self.callback('modified', total_changes) 

132 

133 logger.debug( 

134 'Transaction completed: %-30s ' 

135 '(pid: %s, changes: %i)' % ( 

136 self.label, os.getpid(), total_changes or 0)) 

137 

138 else: 

139 self.cursor.execute('ROLLBACK') 

140 self.started = False 

141 logger.warning('Deferred rollback executed.') 

142 logger.debug( 

143 'Transaction failed: %-30s (pid: %s)' % ( 

144 self.label, os.getpid())) 

145 self.rollback_wanted = False 

146 

147 def rollback(self): 

148 if not self.started: 

149 raise Exception( 

150 'Trying to rollback without having started a transaction.') 

151 

152 self.depth -= 1 

153 if self.depth == 0: 

154 self.cursor.execute('ROLLBACK') 

155 self.started = False 

156 

157 logger.debug( 

158 'Transaction failed: %-30s (pid: %s)' % ( 

159 self.label, os.getpid())) 

160 

161 self.rollback_wanted = False 

162 else: 

163 logger.warning('Deferred rollback scheduled.') 

164 self.rollback_wanted = True 

165 

166 def close(self): 

167 self.cursor.close() 

168 

169 def __enter__(self): 

170 self.begin() 

171 return self.cursor 

172 

173 def __exit__(self, exc_type, exc_value, traceback): 

174 if exc_type is None: 

175 self.commit() 

176 else: 

177 self.rollback() 

178 

179 if self.depth == 0: 

180 self.close() 

181 self.callback = None 

182 

183 

184class Database(object): 

185 ''' 

186 Shared meta-information database used by Squirrel. 

187 ''' 

188 

189 def __init__(self, database_path=':memory:', log_statements=False): 

190 self._database_path = database_path 

191 if database_path != ':memory:': 

192 util.ensuredirs(database_path) 

193 

194 try: 

195 logger.debug('Opening connection to database: %s' % database_path) 

196 self._conn = sqlite3.connect(database_path, isolation_level=None) 

197 except sqlite3.OperationalError: 

198 raise error.SquirrelError( 

199 'Cannot connect to database: %s' % database_path) 

200 

201 self._conn.text_factory = str 

202 self._tables = {} 

203 

204 if log_statements: 

205 self._conn.set_trace_callback(self._log_statement) 

206 

207 self._listeners = [] 

208 self._initialize_db() 

209 self._basepath = None 

210 

211 self.version = None 

212 

213 def set_basepath(self, basepath): 

214 if basepath is not None: 

215 self._basepath = os.path.abspath(basepath) 

216 else: 

217 self._basepath = None 

218 

219 def relpath(self, path): 

220 if self._basepath is not None and path.startswith( 

221 self._basepath + os.path.sep): 

222 return path[len(self._basepath) + 1:] 

223 else: 

224 return path 

225 

226 def abspath(self, path): 

227 if self._basepath is not None and not path.startswith('virtual:') \ 

228 and not path.startswith('client:') \ 

229 and not os.path.isabs(path): 

230 return os.path.join(self._basepath, path) 

231 else: 

232 return path 

233 

234 def _log_statement(self, statement): 

235 logger.debug(statement) 

236 

237 def get_connection(self): 

238 return self._conn 

239 

240 def transaction(self, label='', mode='immediate'): 

241 return Transaction( 

242 self._conn, 

243 label=label, 

244 mode=mode, 

245 callback=self._notify_listeners) 

246 

247 def add_listener(self, listener): 

248 if isinstance(listener, types.MethodType): 

249 listener_ref = weakref.WeakMethod(listener) 

250 else: 

251 listener_ref = weakref.ref(listener) 

252 

253 self._listeners.append(listener_ref) 

254 return listener_ref 

255 

256 def remove_listener(self, listener_ref): 

257 self._listeners.remove(listener_ref) 

258 

259 def _notify_listeners(self, event, *args): 

260 dead = [] 

261 for listener_ref in self._listeners: 

262 listener = listener_ref() 

263 if listener is not None: 

264 listener(event, *args) 

265 else: 

266 dead.append(listener_ref) 

267 

268 for listener_ref in dead: 

269 self.remove_listener(listener_ref) 

270 

271 def _register_table(self, s): 

272 m = re.search(r'(\S+)\s*\(([^)]+)\)', s) 

273 table_name = m.group(1) 

274 dtypes = m.group(2) 

275 table_header = [] 

276 for dele in dtypes.split(','): 

277 table_header.append(dele.split()[:2]) 

278 

279 self._tables[table_name] = table_header 

280 

281 return s 

282 

283 def _initialize_db(self): 

284 with self.transaction('initialize') as cursor: 

285 cursor.execute( 

286 '''PRAGMA recursive_triggers = true''') 

287 

288 cursor.execute( 

289 '''PRAGMA busy_timeout = 30000''') 

290 

291 if 2 == len(list( 

292 cursor.execute( 

293 ''' 

294 SELECT name FROM sqlite_master 

295 WHERE type = 'table' AND name IN ( 

296 'files', 

297 'persistent') 

298 '''))): 

299 

300 try: 

301 self.version = execute_get1( 

302 cursor, 

303 'SELECT value FROM settings WHERE key == "version"') 

304 except sqlite3.OperationalError: 

305 raise error.SquirrelError( 

306 'Squirrel database in pre-release format found: %s\n' 

307 'Please remove the database file and reindex.' 

308 % self._database_path) 

309 

310 return 

311 

312 cursor.execute(self._register_table( 

313 ''' 

314 CREATE TABLE IF NOT EXISTS settings ( 

315 key text PRIMARY KEY, 

316 value text) 

317 ''')) 

318 

319 cursor.execute( 

320 'INSERT OR IGNORE INTO settings VALUES ("version", "1.0")') 

321 

322 self.version = execute_get1( 

323 cursor, 

324 'SELECT value FROM settings WHERE key == "version"') 

325 

326 cursor.execute(self._register_table( 

327 ''' 

328 CREATE TABLE IF NOT EXISTS files ( 

329 file_id integer PRIMARY KEY, 

330 path text, 

331 format text, 

332 mtime float, 

333 size integer) 

334 ''')) 

335 

336 cursor.execute( 

337 ''' 

338 CREATE UNIQUE INDEX IF NOT EXISTS index_files_path 

339 ON files (path) 

340 ''') 

341 

342 cursor.execute(self._register_table( 

343 ''' 

344 CREATE TABLE IF NOT EXISTS nuts ( 

345 nut_id integer PRIMARY KEY AUTOINCREMENT, 

346 file_id integer, 

347 file_segment integer, 

348 file_element integer, 

349 kind_id integer, 

350 kind_codes_id integer, 

351 tmin_seconds integer, 

352 tmin_offset integer, 

353 tmax_seconds integer, 

354 tmax_offset integer, 

355 kscale integer) 

356 ''')) 

357 

358 cursor.execute( 

359 ''' 

360 CREATE UNIQUE INDEX IF NOT EXISTS index_nuts_file_element 

361 ON nuts (file_id, file_segment, file_element) 

362 ''') 

363 

364 cursor.execute(self._register_table( 

365 ''' 

366 CREATE TABLE IF NOT EXISTS kind_codes ( 

367 kind_codes_id integer PRIMARY KEY, 

368 kind_id integer, 

369 codes text, 

370 deltat float) 

371 ''')) 

372 

373 cursor.execute( 

374 ''' 

375 CREATE UNIQUE INDEX IF NOT EXISTS index_kind_codes 

376 ON kind_codes (kind_id, codes, deltat) 

377 ''') 

378 

379 cursor.execute(self._register_table( 

380 ''' 

381 CREATE TABLE IF NOT EXISTS kind_codes_count ( 

382 kind_codes_id integer PRIMARY KEY, 

383 count integer) 

384 ''')) 

385 

386 cursor.execute( 

387 ''' 

388 CREATE INDEX IF NOT EXISTS index_nuts_file_id 

389 ON nuts (file_id) 

390 ''') 

391 

392 cursor.execute( 

393 ''' 

394 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_delete_file 

395 BEFORE DELETE ON files FOR EACH ROW 

396 BEGIN 

397 DELETE FROM nuts where file_id == old.file_id; 

398 END 

399 ''') 

400 

401 # trigger only on size to make silent update of mtime possible 

402 cursor.execute( 

403 ''' 

404 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_update_file 

405 BEFORE UPDATE OF size ON files FOR EACH ROW 

406 BEGIN 

407 DELETE FROM nuts where file_id == old.file_id; 

408 END 

409 ''') 

410 

411 cursor.execute( 

412 ''' 

413 CREATE TRIGGER IF NOT EXISTS increment_kind_codes 

414 BEFORE INSERT ON nuts FOR EACH ROW 

415 BEGIN 

416 INSERT OR IGNORE INTO kind_codes_count 

417 VALUES (new.kind_codes_id, 0); 

418 UPDATE kind_codes_count 

419 SET count = count + 1 

420 WHERE new.kind_codes_id == kind_codes_id; 

421 END 

422 ''') 

423 

424 cursor.execute( 

425 ''' 

426 CREATE TRIGGER IF NOT EXISTS decrement_kind_codes 

427 BEFORE DELETE ON nuts FOR EACH ROW 

428 BEGIN 

429 UPDATE kind_codes_count 

430 SET count = count - 1 

431 WHERE old.kind_codes_id == kind_codes_id; 

432 END 

433 ''') 

434 

435 cursor.execute(self._register_table( 

436 ''' 

437 CREATE TABLE IF NOT EXISTS persistent ( 

438 name text UNIQUE) 

439 ''')) 

440 

441 def dig(self, nuts, transaction=None): 

442 ''' 

443 Store or update content meta-information. 

444 

445 Given ``nuts`` are assumed to represent an up-to-date and complete 

446 inventory of a set of files. Any old information about these files is 

447 first pruned from the database (via database triggers). If such content 

448 is part of a live selection, it is also removed there. Then the new 

449 content meta-information is inserted into the main database. The 

450 content is not automatically inserted into the live selections again. 

451 It is in the responsibility of the selection object to perform this 

452 step. 

453 ''' 

454 

455 nuts = list(nuts) 

456 

457 if not nuts: 

458 return 

459 

460 files = set() 

461 kind_codes = set() 

462 for nut in nuts: 

463 files.add(( 

464 self.relpath(nut.file_path), 

465 nut.file_format, 

466 nut.file_mtime, 

467 nut.file_size)) 

468 kind_codes.add( 

469 (nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0)) 

470 

471 with (transaction or self.transaction('dig')) as c: 

472 

473 c.executemany( 

474 'INSERT OR IGNORE INTO files VALUES (NULL,?,?,?,?)', files) 

475 

476 c.executemany( 

477 '''UPDATE files SET 

478 format = ?, mtime = ?, size = ? 

479 WHERE path == ? 

480 ''', 

481 ((x[1], x[2], x[3], x[0]) for x in files)) 

482 

483 c.executemany( 

484 'INSERT OR IGNORE INTO kind_codes VALUES (NULL,?,?,?)', 

485 kind_codes) 

486 

487 c.executemany( 

488 ''' 

489 INSERT INTO nuts VALUES 

490 (NULL, ( 

491 SELECT file_id FROM files 

492 WHERE path == ? 

493 ),?,?,?, 

494 ( 

495 SELECT kind_codes_id FROM kind_codes 

496 WHERE kind_id == ? AND codes == ? AND deltat == ? 

497 ), ?,?,?,?,?) 

498 ''', 

499 ((self.relpath(nut.file_path), 

500 nut.file_segment, nut.file_element, 

501 nut.kind_id, 

502 nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0, 

503 nut.tmin_seconds, nut.tmin_offset, 

504 nut.tmax_seconds, nut.tmax_offset, 

505 nut.kscale) for nut in nuts)) 

506 

507 def undig(self, path): 

508 

509 path = self.relpath(abspath(path)) 

510 

511 sql = ''' 

512 SELECT 

513 files.path, 

514 files.format, 

515 files.mtime, 

516 files.size, 

517 nuts.file_segment, 

518 nuts.file_element, 

519 kind_codes.kind_id, 

520 kind_codes.codes, 

521 nuts.tmin_seconds, 

522 nuts.tmin_offset, 

523 nuts.tmax_seconds, 

524 nuts.tmax_offset, 

525 kind_codes.deltat 

526 FROM files 

527 INNER JOIN nuts ON files.file_id = nuts.file_id 

528 INNER JOIN kind_codes 

529 ON nuts.kind_codes_id == kind_codes.kind_codes_id 

530 WHERE path == ? 

531 ''' 

532 

533 return [Nut(values_nocheck=(self.abspath(row[0]),) + row[1:]) 

534 for row in self._conn.execute(sql, (path,))] 

535 

536 def undig_all(self): 

537 sql = ''' 

538 SELECT 

539 files.path, 

540 files.format, 

541 files.mtime, 

542 files.size, 

543 nuts.file_segment, 

544 nuts.file_element, 

545 kind_codes.kind_id, 

546 kind_codes.codes, 

547 nuts.tmin_seconds, 

548 nuts.tmin_offset, 

549 nuts.tmax_seconds, 

550 nuts.tmax_offset, 

551 kind_codes.deltat 

552 FROM files 

553 INNER JOIN nuts ON files.file_id == nuts.file_id 

554 INNER JOIN kind_codes 

555 ON nuts.kind_codes_id == kind_codes.kind_codes_id 

556 ''' 

557 

558 nuts = [] 

559 path = None 

560 for values in self._conn.execute(sql): 

561 if path is not None and values[0] != path: 

562 yield path, nuts 

563 nuts = [] 

564 

565 path = self.abspath(values[0]) 

566 

567 if values[1] is not None: 

568 nuts.append(Nut(values_nocheck=(path,) + values[1:])) 

569 

570 if path is not None: 

571 yield path, nuts 

572 

573 def undig_few(self, paths, format='detect'): 

574 for path in paths: 

575 nuts = self.undig(path) 

576 if nuts: 

577 yield (nuts[0].file_format, path), nuts 

578 else: 

579 yield (format, path), [] 

580 

581 def undig_many(self, paths, show_progress=True): 

582 selection = self.new_selection(paths, show_progress=show_progress) 

583 

584 for (_, path), nuts in selection.undig_grouped(): 

585 yield path, nuts 

586 

587 del selection 

588 

589 def new_selection(self, paths=None, format='detect', show_progress=True): 

590 from .selection import Selection 

591 selection = Selection(self) 

592 if paths: 

593 selection.add(paths, format=format, show_progress=show_progress) 

594 return selection 

595 

596 def undig_content(self, nut): 

597 return None 

598 

599 def remove(self, path): 

600 ''' 

601 Prune content meta-information about a given file. 

602 

603 All content pieces belonging to file ``path`` are removed from the 

604 main database and any attached live selections (via database triggers). 

605 ''' 

606 

607 path = self.relpath(abspath(path)) 

608 

609 with self.transaction('remove file') as cursor: 

610 cursor.execute( 

611 'DELETE FROM files WHERE path = ?', (path,)) 

612 

613 def remove_glob(self, pattern): 

614 ''' 

615 Prune content meta-information about files matching given pattern. 

616 

617 All content pieces belonging to files who's pathes match the given 

618 ``pattern`` are removed from the main database and any attached live 

619 selections (via database triggers). 

620 ''' 

621 

622 with self.transaction('remove file glob') as cursor: 

623 return cursor.execute( 

624 'DELETE FROM files WHERE path GLOB ?', (pattern,)).rowcount 

625 

626 def _remove_volatile(self): 

627 ''' 

628 Prune leftover volatile content from database. 

629 

630 If the cleanup handler of an attached selection is not called, e.g. due 

631 to a crash or terminated process, volatile content will not be removed 

632 properly. This method will delete such leftover entries. 

633 

634 This is a mainenance operatation which should only be called when no 

635 apps are using the database because it would remove volatile content 

636 currently used by the apps. 

637 ''' 

638 

639 with self.transaction('remove volatile') as cursor: 

640 return cursor.execute( 

641 ''' 

642 DELETE FROM files 

643 WHERE path LIKE 'virtual:volatile:%' 

644 ''').rowcount 

645 

646 def reset(self, path, transaction=None): 

647 ''' 

648 Prune information associated with a given file, but keep the file path. 

649 

650 This method is called when reading a file failed. File attributes, 

651 format, size and modification time are set to NULL. File content 

652 meta-information is removed from the database and any attached live 

653 selections (via database triggers). 

654 ''' 

655 

656 path = self.relpath(abspath(path)) 

657 

658 with (transaction or self.transaction('reset file')) as cursor: 

659 cursor.execute( 

660 ''' 

661 UPDATE files SET 

662 format = NULL, 

663 mtime = NULL, 

664 size = NULL 

665 WHERE path = ? 

666 ''', (path,)) 

667 

668 def silent_touch(self, path): 

669 ''' 

670 Update modification time of file without initiating reindexing. 

671 

672 Useful to prolong validity period of data with expiration date. 

673 ''' 

674 

675 apath = abspath(path) 

676 path = self.relpath(apath) 

677 

678 with self.transaction('silent touch') as cursor: 

679 

680 sql = 'SELECT format, size FROM files WHERE path = ?' 

681 fmt, size = execute_get1(cursor, sql, (path,)) 

682 

683 mod = io.get_backend(fmt) 

684 mod.touch(apath) 

685 file_stats = mod.get_stats(apath) 

686 

687 if file_stats[1] != size: 

688 raise FileLoadError( 

689 'Silent update for file "%s" failed: size has changed.' 

690 % apath) 

691 

692 sql = ''' 

693 UPDATE files 

694 SET mtime = ? 

695 WHERE path = ? 

696 ''' 

697 cursor.execute(sql, (file_stats[0], path)) 

698 

699 def _iter_codes_info(self, kind=None, kind_codes_count='kind_codes_count'): 

700 args = [] 

701 sel = '' 

702 if kind is not None: 

703 sel = 'AND kind_codes.kind_id == ?' 

704 args.append(to_kind_id(kind)) 

705 

706 sql = (''' 

707 SELECT 

708 kind_codes.kind_id, 

709 kind_codes.codes, 

710 kind_codes.deltat, 

711 kind_codes.kind_codes_id, 

712 %(kind_codes_count)s.count 

713 FROM %(kind_codes_count)s 

714 INNER JOIN kind_codes 

715 ON %(kind_codes_count)s.kind_codes_id 

716 == kind_codes.kind_codes_id 

717 WHERE %(kind_codes_count)s.count > 0 

718 ''' + sel + ''' 

719 ''') % {'kind_codes_count': kind_codes_count} 

720 

721 for kind_id, scodes, deltat, kcid, count in self._conn.execute( 

722 sql, args): 

723 

724 yield ( 

725 kind_id, to_codes_simple(kind_id, scodes), deltat, kcid, count) 

726 

727 def _iter_deltats(self, kind=None, kind_codes_count='kind_codes_count'): 

728 args = [] 

729 sel = '' 

730 if kind is not None: 

731 assert isinstance(kind, str) 

732 sel = 'AND kind_codes.kind_id == ?' 

733 args.append(to_kind_id(kind)) 

734 

735 sql = (''' 

736 SELECT DISTINCT kind_codes.deltat FROM %(kind_codes_count)s 

737 INNER JOIN kind_codes 

738 ON %(kind_codes_count)s.kind_codes_id 

739 == kind_codes.kind_codes_id 

740 WHERE %(kind_codes_count)s.count > 0 

741 ''' + sel + ''' 

742 ORDER BY kind_codes.deltat 

743 ''') % {'kind_codes_count': kind_codes_count} 

744 

745 for row in self._conn.execute(sql, args): 

746 yield row[0] 

747 

748 def _iter_codes(self, kind=None, kind_codes_count='kind_codes_count'): 

749 args = [] 

750 sel = '' 

751 if kind is not None: 

752 assert isinstance(kind, str) 

753 sel = 'AND kind_codes.kind_id == ?' 

754 args.append(to_kind_id(kind)) 

755 

756 sql = (''' 

757 SELECT DISTINCT kind_codes.kind_id, kind_codes.codes 

758 FROM %(kind_codes_count)s 

759 INNER JOIN kind_codes 

760 ON %(kind_codes_count)s.kind_codes_id 

761 == kind_codes.kind_codes_id 

762 WHERE %(kind_codes_count)s.count > 0 

763 ''' + sel + ''' 

764 ORDER BY kind_codes.codes 

765 ''') % dict(kind_codes_count=kind_codes_count) 

766 

767 for row in self._conn.execute(sql, args): 

768 yield to_codes_simple(*row) 

769 

770 def _iter_kinds(self, codes=None, kind_codes_count='kind_codes_count'): 

771 args = [] 

772 sel = '' 

773 if codes is not None: 

774 sel = 'AND kind_codes.codes == ?' 

775 args.append(codes.safe_str) 

776 

777 sql = (''' 

778 SELECT DISTINCT kind_codes.kind_id FROM %(kind_codes_count)s 

779 INNER JOIN kind_codes 

780 ON %(kind_codes_count)s.kind_codes_id 

781 == kind_codes.kind_codes_id 

782 WHERE %(kind_codes_count)s.count > 0 

783 ''' + sel + ''' 

784 ORDER BY kind_codes.kind_id 

785 ''') % {'kind_codes_count': kind_codes_count} 

786 

787 for row in self._conn.execute(sql, args): 

788 yield to_kind(row[0]) 

789 

790 def iter_paths(self): 

791 for row in self._conn.execute('''SELECT path FROM files'''): 

792 yield self.abspath(row[0]) 

793 

794 def iter_nnuts_by_file(self): 

795 sql = ''' 

796 SELECT 

797 path, 

798 (SELECT COUNT(*) FROM nuts WHERE nuts.file_id = files.file_id) 

799 FROM files 

800 ''' 

801 for row in self._conn.execute(sql): 

802 yield (self.abspath(row[0]),) + row[1:] 

803 

804 def iter_kinds(self, codes=None): 

805 return self._iter_kinds(codes=codes) 

806 

807 def iter_codes(self, kind=None): 

808 return self._iter_codes(kind=kind) 

809 

810 def get_paths(self): 

811 return list(self.iter_paths()) 

812 

813 def get_kinds(self, codes=None): 

814 return list(self.iter_kinds(codes=codes)) 

815 

816 def get_codes(self, kind=None): 

817 return list(self.iter_codes(kind=kind)) 

818 

819 def get_counts(self, kind=None): 

820 d = {} 

821 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

822 if kind_id not in d: 

823 v = d[kind_id] = {} 

824 else: 

825 v = d[kind_id] 

826 

827 if codes not in v: 

828 v[codes] = 0 

829 

830 v[codes] += count 

831 

832 if kind is not None: 

833 return d[to_kind_id(kind)] 

834 else: 

835 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

836 

837 def get_nfiles(self): 

838 sql = '''SELECT COUNT(*) FROM files''' 

839 for row in self._conn.execute(sql): 

840 return row[0] 

841 

842 def get_nnuts(self): 

843 sql = '''SELECT COUNT(*) FROM nuts''' 

844 for row in self._conn.execute(sql): 

845 return row[0] 

846 

847 def get_nnuts_by_file(self): 

848 return list(self.iter_nnuts_by_file()) 

849 

850 def get_total_size(self): 

851 sql = ''' 

852 SELECT SUM(files.size) FROM files 

853 ''' 

854 

855 for row in self._conn.execute(sql): 

856 return row[0] or 0 

857 

858 def get_persistent_names(self): 

859 sql = ''' 

860 SELECT name FROM persistent 

861 ''' 

862 return [row[0] for row in self._conn.execute(sql)] 

863 

864 def get_stats(self): 

865 return DatabaseStats( 

866 nfiles=self.get_nfiles(), 

867 nnuts=self.get_nnuts(), 

868 kinds=self.get_kinds(), 

869 codes=self.get_codes(), 

870 counts=self.get_counts(), 

871 total_size=self.get_total_size(), 

872 persistent=self.get_persistent_names()) 

873 

874 def __str__(self): 

875 return str(self.get_stats()) 

876 

877 def print_tables(self, stream=None): 

878 for table in [ 

879 'persistent', 

880 'files', 

881 'nuts', 

882 'kind_codes', 

883 'kind_codes_count']: 

884 

885 self.print_table(table, stream=stream) 

886 

887 def print_table(self, name, stream=None): 

888 

889 if stream is None: 

890 stream = sys.stdout 

891 

892 class hstr(str): 

893 def __repr__(self): 

894 return self 

895 

896 w = stream.write 

897 w('\n') 

898 w('\n') 

899 w(name) 

900 w('\n') 

901 sql = 'SELECT * FROM %s' % name 

902 tab = [] 

903 if name in self._tables: 

904 headers = self._tables[name] 

905 tab.append([None for _ in headers]) 

906 tab.append([hstr(x[0]) for x in headers]) 

907 tab.append([hstr(x[1]) for x in headers]) 

908 tab.append([None for _ in headers]) 

909 

910 for row in self._conn.execute(sql): 

911 tab.append([x for x in row]) 

912 

913 widths = [ 

914 max((len(repr(x)) if x is not None else 0) for x in col) 

915 for col in zip(*tab)] 

916 

917 for row in tab: 

918 w(' '.join( 

919 (repr(x).ljust(wid) if x is not None else ''.ljust(wid, '-')) 

920 for (x, wid) in zip(row, widths))) 

921 

922 w('\n') 

923 

924 w('\n') 

925 

926 

927class DatabaseStats(Object): 

928 ''' 

929 Container to hold statistics about contents cached in meta-information db. 

930 ''' 

931 

932 nfiles = Int.T( 

933 help='Number of files in database.') 

934 nnuts = Int.T( 

935 help='Number of index nuts in database.') 

936 codes = List.T( 

937 Tuple.T(content_t=String.T()), 

938 help='Available code sequences in database, e.g. ' 

939 '(agency, network, station, location) for stations nuts.') 

940 kinds = List.T( 

941 String.T(), 

942 help='Available content types in database.') 

943 total_size = Int.T( 

944 help='Aggregated file size [bytes] of files referenced in database.') 

945 counts = Dict.T( 

946 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

947 help='Breakdown of how many nuts of any content type and code ' 

948 'sequence are available in database, ``counts[kind][codes]``.') 

949 persistent = List.T( 

950 String.T(), 

951 help='Names of persistent selections stored in database.') 

952 

953 def __str__(self): 

954 kind_counts = dict( 

955 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

956 

957 codes = [c.safe_str for c in self.codes] 

958 

959 if len(codes) > 20: 

960 scodes = '\n' + util.ewrap(codes[:10], indent=' ') \ 

961 + '\n [%i more]\n' % (len(codes) - 20) \ 

962 + util.ewrap(codes[-10:], indent=' ') 

963 else: 

964 scodes = '\n' + util.ewrap(codes, indent=' ') \ 

965 if codes else '<none>' 

966 

967 s = ''' 

968Available codes: %s 

969Number of files: %i 

970Total size of known files: %s 

971Number of index nuts: %i 

972Available content kinds: %s 

973Persistent selections: %s''' % ( 

974 scodes, 

975 self.nfiles, 

976 util.human_bytesize(self.total_size), 

977 self.nnuts, 

978 ', '.join('%s: %i' % ( 

979 kind, kind_counts[kind]) for kind in sorted(self.kinds)), 

980 ', '.join(self.persistent)) 

981 

982 return s 

983 

984 

985__all__ = [ 

986 'Database', 

987 'DatabaseStats', 

988]