1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6from __future__ import absolute_import, print_function 

7 

8import sys 

9import os 

10import logging 

11import sqlite3 

12import re 

13import time 

14import types 

15import weakref 

16 

17from pyrocko.io.io_common import FileLoadError 

18from pyrocko import util 

19from pyrocko.guts import Object, Int, List, Dict, Tuple, String 

20from . import error, io 

21from .model import Nut, to_kind_id, to_kind, to_codes_simple, \ 

22 codes_patterns_for_kind 

23from .error import SquirrelError 

24 

25logger = logging.getLogger('psq.database') 

26 

27guts_prefix = 'squirrel' 

28 

29 

30def abspath(path): 

31 if not path.startswith('virtual:') and not path.startswith('client:'): 

32 return os.path.abspath(path) 

33 else: 

34 return path 

35 

36 

37def versiontuple(s): 

38 fill = [0, 0, 0] 

39 vals = [int(x) for x in s.split('.')] 

40 fill[:len(vals)] = vals 

41 return tuple(fill) 

42 

43 

44class ExecuteGet1Error(SquirrelError): 

45 pass 

46 

47 

48def execute_get1(connection, sql, args=()): 

49 rows = list(connection.execute(sql, args)) 

50 if len(rows) == 1: 

51 return rows[0] 

52 else: 

53 raise ExecuteGet1Error('Expected database entry not found.') 

54 

55 

56g_databases = {} 

57 

58 

59def get_database(path): 

60 path = os.path.abspath(path) 

61 if path not in g_databases: 

62 g_databases[path] = Database(path) 

63 

64 return g_databases[path] 

65 

66 

67def close_database(database): 

68 path = os.path.abspath(database._database_path) 

69 database._conn.close() 

70 if path in g_databases: 

71 del g_databases[path] 

72 

73 

74class Transaction(object): 

75 def __init__( 

76 self, conn, 

77 label='', 

78 mode='immediate', 

79 retry_interval=0.1, 

80 callback=None): 

81 

82 self.cursor = conn.cursor() 

83 assert mode in ('deferred', 'immediate', 'exclusive') 

84 self.mode = mode 

85 self.depth = 0 

86 self.rollback_wanted = False 

87 self.retry_interval = retry_interval 

88 self.callback = callback 

89 self.label = label 

90 self.started = False 

91 

92 def begin(self): 

93 if self.depth == 0: 

94 tries = 0 

95 while True: 

96 try: 

97 tries += 1 

98 self.cursor.execute('BEGIN %s' % self.mode.upper()) 

99 self.started = True 

100 logger.debug( 

101 'Transaction started: %-30s (pid: %s, mode: %s)' 

102 % (self.label, os.getpid(), self.mode)) 

103 

104 self.total_changes_begin \ 

105 = self.cursor.connection.total_changes 

106 break 

107 

108 except sqlite3.OperationalError as e: 

109 if not str(e) == 'database is locked': 

110 raise 

111 

112 logger.info( 

113 'Database is locked retrying in %s s: %s ' 

114 '(pid: %s, tries: %i)' % ( 

115 self.retry_interval, self.label, 

116 os.getpid(), tries)) 

117 

118 time.sleep(self.retry_interval) 

119 

120 self.depth += 1 

121 

122 def commit(self): 

123 if not self.started: 

124 raise Exception( 

125 'Trying to commit without having started a transaction.') 

126 

127 self.depth -= 1 

128 if self.depth == 0: 

129 if not self.rollback_wanted: 

130 self.cursor.execute('COMMIT') 

131 self.started = False 

132 if self.total_changes_begin is not None: 

133 total_changes = self.cursor.connection.total_changes \ 

134 - self.total_changes_begin 

135 else: 

136 total_changes = None 

137 

138 if self.callback is not None and total_changes: 

139 self.callback('modified', total_changes) 

140 

141 logger.debug( 

142 'Transaction completed: %-30s ' 

143 '(pid: %s, changes: %i)' % ( 

144 self.label, os.getpid(), total_changes or 0)) 

145 

146 else: 

147 self.cursor.execute('ROLLBACK') 

148 self.started = False 

149 logger.warning('Deferred rollback executed.') 

150 logger.debug( 

151 'Transaction failed: %-30s (pid: %s)' % ( 

152 self.label, os.getpid())) 

153 self.rollback_wanted = False 

154 

155 def rollback(self): 

156 if not self.started: 

157 raise Exception( 

158 'Trying to rollback without having started a transaction.') 

159 

160 self.depth -= 1 

161 if self.depth == 0: 

162 self.cursor.execute('ROLLBACK') 

163 self.started = False 

164 

165 logger.debug( 

166 'Transaction failed: %-30s (pid: %s)' % ( 

167 self.label, os.getpid())) 

168 

169 self.rollback_wanted = False 

170 else: 

171 logger.warning('Deferred rollback scheduled.') 

172 self.rollback_wanted = True 

173 

174 def close(self): 

175 self.cursor.close() 

176 

177 def __enter__(self): 

178 self.begin() 

179 return self.cursor 

180 

181 def __exit__(self, exc_type, exc_value, traceback): 

182 if exc_type is None: 

183 self.commit() 

184 else: 

185 self.rollback() 

186 

187 if self.depth == 0: 

188 self.close() 

189 self.callback = None 

190 

191 

192class Database(object): 

193 ''' 

194 Shared meta-information database used by Squirrel. 

195 ''' 

196 

197 def __init__(self, database_path=':memory:', log_statements=False): 

198 self._database_path = database_path 

199 if database_path != ':memory:': 

200 util.ensuredirs(database_path) 

201 

202 try: 

203 logger.debug('Opening connection to database: %s' % database_path) 

204 self._conn = sqlite3.connect(database_path, isolation_level=None) 

205 except sqlite3.OperationalError: 

206 raise error.SquirrelError( 

207 'Cannot connect to database: %s' % database_path) 

208 

209 self._conn.text_factory = str 

210 self._tables = {} 

211 

212 if log_statements: 

213 self._conn.set_trace_callback(self._log_statement) 

214 

215 self._listeners = [] 

216 self._initialize_db() 

217 self._basepath = None 

218 

219 self.version = None 

220 

221 def set_basepath(self, basepath): 

222 if basepath is not None: 

223 self._basepath = os.path.abspath(basepath) 

224 else: 

225 self._basepath = None 

226 

227 def relpath(self, path): 

228 if self._basepath is not None and path.startswith( 

229 self._basepath + os.path.sep): 

230 return path[len(self._basepath) + 1:] 

231 else: 

232 return path 

233 

234 def abspath(self, path): 

235 if self._basepath is not None and not path.startswith('virtual:') \ 

236 and not path.startswith('client:') \ 

237 and not os.path.isabs(path): 

238 return os.path.join(self._basepath, path) 

239 else: 

240 return path 

241 

242 def _log_statement(self, statement): 

243 logger.debug(statement) 

244 

245 def get_connection(self): 

246 return self._conn 

247 

248 def transaction(self, label='', mode='immediate'): 

249 return Transaction( 

250 self._conn, 

251 label=label, 

252 mode=mode, 

253 callback=self._notify_listeners) 

254 

255 def add_listener(self, listener): 

256 if isinstance(listener, types.MethodType): 

257 listener_ref = weakref.WeakMethod(listener) 

258 else: 

259 listener_ref = weakref.ref(listener) 

260 

261 self._listeners.append(listener_ref) 

262 return listener_ref 

263 

264 def remove_listener(self, listener_ref): 

265 self._listeners.remove(listener_ref) 

266 

267 def _notify_listeners(self, event, *args): 

268 dead = [] 

269 for listener_ref in self._listeners: 

270 listener = listener_ref() 

271 if listener is not None: 

272 listener(event, *args) 

273 else: 

274 dead.append(listener_ref) 

275 

276 for listener_ref in dead: 

277 self.remove_listener(listener_ref) 

278 

279 def _register_table(self, s): 

280 m = re.search(r'(\S+)\s*\(([^)]+)\)', s) 

281 table_name = m.group(1) 

282 dtypes = m.group(2) 

283 table_header = [] 

284 for dele in dtypes.split(','): 

285 table_header.append(dele.split()[:2]) 

286 

287 self._tables[table_name] = table_header 

288 

289 return s 

290 

291 def _initialize_db(self): 

292 with self.transaction('initialize') as cursor: 

293 cursor.execute( 

294 '''PRAGMA recursive_triggers = true''') 

295 

296 cursor.execute( 

297 '''PRAGMA busy_timeout = 30000''') 

298 

299 if 2 == len(list( 

300 cursor.execute( 

301 ''' 

302 SELECT name FROM sqlite_master 

303 WHERE type = 'table' AND name IN ( 

304 'files', 

305 'persistent') 

306 '''))): 

307 

308 try: 

309 self.version = versiontuple(execute_get1( 

310 cursor, 

311 ''' 

312 SELECT value FROM settings 

313 WHERE key == "version" 

314 ''')[0]) 

315 except sqlite3.OperationalError: 

316 raise error.SquirrelError( 

317 'Squirrel database in pre-release format found: %s\n' 

318 'Please remove the database file and reindex.' 

319 % self._database_path) 

320 

321 if self.version >= (1, 1, 0): 

322 raise error.SquirrelError( 

323 'Squirrel database "%s" is of version %i.%i.%i which ' 

324 'is not supported by this version of Pyrocko. Please ' 

325 'upgrade the Pyrocko library.' 

326 % ((self._database_path, ) + self.version)) 

327 

328 return 

329 

330 cursor.execute(self._register_table( 

331 ''' 

332 CREATE TABLE IF NOT EXISTS settings ( 

333 key text PRIMARY KEY, 

334 value text) 

335 ''')) 

336 

337 cursor.execute( 

338 'INSERT OR IGNORE INTO settings VALUES ("version", "1.0")') 

339 

340 self.version = execute_get1( 

341 cursor, 

342 'SELECT value FROM settings WHERE key == "version"') 

343 

344 cursor.execute(self._register_table( 

345 ''' 

346 CREATE TABLE IF NOT EXISTS files ( 

347 file_id integer PRIMARY KEY, 

348 path text, 

349 format text, 

350 mtime float, 

351 size integer) 

352 ''')) 

353 

354 cursor.execute( 

355 ''' 

356 CREATE UNIQUE INDEX IF NOT EXISTS index_files_path 

357 ON files (path) 

358 ''') 

359 

360 cursor.execute(self._register_table( 

361 ''' 

362 CREATE TABLE IF NOT EXISTS nuts ( 

363 nut_id integer PRIMARY KEY AUTOINCREMENT, 

364 file_id integer, 

365 file_segment integer, 

366 file_element integer, 

367 kind_id integer, 

368 kind_codes_id integer, 

369 tmin_seconds integer, 

370 tmin_offset integer, 

371 tmax_seconds integer, 

372 tmax_offset integer, 

373 kscale integer) 

374 ''')) 

375 

376 cursor.execute( 

377 ''' 

378 CREATE UNIQUE INDEX IF NOT EXISTS index_nuts_file_element 

379 ON nuts (file_id, file_segment, file_element) 

380 ''') 

381 

382 cursor.execute(self._register_table( 

383 ''' 

384 CREATE TABLE IF NOT EXISTS kind_codes ( 

385 kind_codes_id integer PRIMARY KEY, 

386 kind_id integer, 

387 codes text, 

388 deltat float) 

389 ''')) 

390 

391 cursor.execute( 

392 ''' 

393 CREATE UNIQUE INDEX IF NOT EXISTS index_kind_codes 

394 ON kind_codes (kind_id, codes, deltat) 

395 ''') 

396 

397 cursor.execute(self._register_table( 

398 ''' 

399 CREATE TABLE IF NOT EXISTS kind_codes_count ( 

400 kind_codes_id integer PRIMARY KEY, 

401 count integer) 

402 ''')) 

403 

404 cursor.execute( 

405 ''' 

406 CREATE INDEX IF NOT EXISTS index_nuts_file_id 

407 ON nuts (file_id) 

408 ''') 

409 

410 cursor.execute( 

411 ''' 

412 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_delete_file 

413 BEFORE DELETE ON files FOR EACH ROW 

414 BEGIN 

415 DELETE FROM nuts where file_id == old.file_id; 

416 END 

417 ''') 

418 

419 # trigger only on size to make silent update of mtime possible 

420 cursor.execute( 

421 ''' 

422 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_update_file 

423 BEFORE UPDATE OF size ON files FOR EACH ROW 

424 BEGIN 

425 DELETE FROM nuts where file_id == old.file_id; 

426 END 

427 ''') 

428 

429 cursor.execute( 

430 ''' 

431 CREATE TRIGGER IF NOT EXISTS increment_kind_codes 

432 BEFORE INSERT ON nuts FOR EACH ROW 

433 BEGIN 

434 INSERT OR IGNORE INTO kind_codes_count 

435 VALUES (new.kind_codes_id, 0); 

436 UPDATE kind_codes_count 

437 SET count = count + 1 

438 WHERE new.kind_codes_id == kind_codes_id; 

439 END 

440 ''') 

441 

442 cursor.execute( 

443 ''' 

444 CREATE TRIGGER IF NOT EXISTS decrement_kind_codes 

445 BEFORE DELETE ON nuts FOR EACH ROW 

446 BEGIN 

447 UPDATE kind_codes_count 

448 SET count = count - 1 

449 WHERE old.kind_codes_id == kind_codes_id; 

450 END 

451 ''') 

452 

453 cursor.execute(self._register_table( 

454 ''' 

455 CREATE TABLE IF NOT EXISTS persistent ( 

456 name text UNIQUE) 

457 ''')) 

458 

459 def dig(self, nuts, transaction=None): 

460 ''' 

461 Store or update content meta-information. 

462 

463 Given ``nuts`` are assumed to represent an up-to-date and complete 

464 inventory of a set of files. Any old information about these files is 

465 first pruned from the database (via database triggers). If such content 

466 is part of a live selection, it is also removed there. Then the new 

467 content meta-information is inserted into the main database. The 

468 content is not automatically inserted into the live selections again. 

469 It is in the responsibility of the selection object to perform this 

470 step. 

471 ''' 

472 

473 nuts = list(nuts) 

474 

475 if not nuts: 

476 return 

477 

478 files = set() 

479 kind_codes = set() 

480 for nut in nuts: 

481 files.add(( 

482 self.relpath(nut.file_path), 

483 nut.file_format, 

484 nut.file_mtime, 

485 nut.file_size)) 

486 kind_codes.add( 

487 (nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0)) 

488 

489 with (transaction or self.transaction('dig')) as c: 

490 

491 c.executemany( 

492 'INSERT OR IGNORE INTO files VALUES (NULL,?,?,?,?)', files) 

493 

494 c.executemany( 

495 '''UPDATE files SET 

496 format = ?, mtime = ?, size = ? 

497 WHERE path == ? 

498 ''', 

499 ((x[1], x[2], x[3], x[0]) for x in files)) 

500 

501 c.executemany( 

502 'INSERT OR IGNORE INTO kind_codes VALUES (NULL,?,?,?)', 

503 kind_codes) 

504 

505 c.executemany( 

506 ''' 

507 INSERT INTO nuts VALUES 

508 (NULL, ( 

509 SELECT file_id FROM files 

510 WHERE path == ? 

511 ),?,?,?, 

512 ( 

513 SELECT kind_codes_id FROM kind_codes 

514 WHERE kind_id == ? AND codes == ? AND deltat == ? 

515 ), ?,?,?,?,?) 

516 ''', 

517 ((self.relpath(nut.file_path), 

518 nut.file_segment, nut.file_element, 

519 nut.kind_id, 

520 nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0, 

521 nut.tmin_seconds, nut.tmin_offset, 

522 nut.tmax_seconds, nut.tmax_offset, 

523 nut.kscale) for nut in nuts)) 

524 

525 def undig(self, path): 

526 

527 path = self.relpath(abspath(path)) 

528 

529 sql = ''' 

530 SELECT 

531 files.path, 

532 files.format, 

533 files.mtime, 

534 files.size, 

535 nuts.file_segment, 

536 nuts.file_element, 

537 kind_codes.kind_id, 

538 kind_codes.codes, 

539 nuts.tmin_seconds, 

540 nuts.tmin_offset, 

541 nuts.tmax_seconds, 

542 nuts.tmax_offset, 

543 kind_codes.deltat 

544 FROM files 

545 INNER JOIN nuts ON files.file_id = nuts.file_id 

546 INNER JOIN kind_codes 

547 ON nuts.kind_codes_id == kind_codes.kind_codes_id 

548 WHERE path == ? 

549 ''' 

550 

551 return [Nut(values_nocheck=(self.abspath(row[0]),) + row[1:]) 

552 for row in self._conn.execute(sql, (path,))] 

553 

554 def undig_all(self): 

555 sql = ''' 

556 SELECT 

557 files.path, 

558 files.format, 

559 files.mtime, 

560 files.size, 

561 nuts.file_segment, 

562 nuts.file_element, 

563 kind_codes.kind_id, 

564 kind_codes.codes, 

565 nuts.tmin_seconds, 

566 nuts.tmin_offset, 

567 nuts.tmax_seconds, 

568 nuts.tmax_offset, 

569 kind_codes.deltat 

570 FROM files 

571 INNER JOIN nuts ON files.file_id == nuts.file_id 

572 INNER JOIN kind_codes 

573 ON nuts.kind_codes_id == kind_codes.kind_codes_id 

574 ''' 

575 

576 nuts = [] 

577 path = None 

578 for values in self._conn.execute(sql): 

579 if path is not None and values[0] != path: 

580 yield path, nuts 

581 nuts = [] 

582 

583 path = self.abspath(values[0]) 

584 

585 if values[1] is not None: 

586 nuts.append(Nut(values_nocheck=(path,) + values[1:])) 

587 

588 if path is not None: 

589 yield path, nuts 

590 

591 def undig_few(self, paths, format='detect'): 

592 for path in paths: 

593 nuts = self.undig(path) 

594 if nuts: 

595 yield (nuts[0].file_format, path), nuts 

596 else: 

597 yield (format, path), [] 

598 

599 def undig_many(self, paths, show_progress=True): 

600 selection = self.new_selection(paths, show_progress=show_progress) 

601 

602 for (_, path), nuts in selection.undig_grouped(): 

603 yield path, nuts 

604 

605 del selection 

606 

607 def new_selection(self, paths=None, format='detect', show_progress=True): 

608 from .selection import Selection 

609 selection = Selection(self) 

610 if paths: 

611 selection.add(paths, format=format, show_progress=show_progress) 

612 return selection 

613 

614 def undig_content(self, nut): 

615 return None 

616 

617 def remove(self, path): 

618 ''' 

619 Prune content meta-information about a given file. 

620 

621 All content pieces belonging to file ``path`` are removed from the 

622 main database and any attached live selections (via database triggers). 

623 ''' 

624 

625 path = self.relpath(abspath(path)) 

626 

627 with self.transaction('remove file') as cursor: 

628 cursor.execute( 

629 'DELETE FROM files WHERE path = ?', (path,)) 

630 

631 def remove_glob(self, pattern): 

632 ''' 

633 Prune content meta-information about files matching given pattern. 

634 

635 All content pieces belonging to files who's pathes match the given 

636 ``pattern`` are removed from the main database and any attached live 

637 selections (via database triggers). 

638 ''' 

639 

640 with self.transaction('remove file glob') as cursor: 

641 return cursor.execute( 

642 'DELETE FROM files WHERE path GLOB ?', (pattern,)).rowcount 

643 

644 def _remove_volatile(self): 

645 ''' 

646 Prune leftover volatile content from database. 

647 

648 If the cleanup handler of an attached selection is not called, e.g. due 

649 to a crash or terminated process, volatile content will not be removed 

650 properly. This method will delete such leftover entries. 

651 

652 This is a mainenance operatation which should only be called when no 

653 apps are using the database because it would remove volatile content 

654 currently used by the apps. 

655 ''' 

656 

657 with self.transaction('remove volatile') as cursor: 

658 return cursor.execute( 

659 ''' 

660 DELETE FROM files 

661 WHERE path LIKE 'virtual:volatile:%' 

662 ''').rowcount 

663 

664 def reset(self, path, transaction=None): 

665 ''' 

666 Prune information associated with a given file, but keep the file path. 

667 

668 This method is called when reading a file failed. File attributes, 

669 format, size and modification time are set to NULL. File content 

670 meta-information is removed from the database and any attached live 

671 selections (via database triggers). 

672 ''' 

673 

674 path = self.relpath(abspath(path)) 

675 

676 with (transaction or self.transaction('reset file')) as cursor: 

677 cursor.execute( 

678 ''' 

679 UPDATE files SET 

680 format = NULL, 

681 mtime = NULL, 

682 size = NULL 

683 WHERE path = ? 

684 ''', (path,)) 

685 

686 def silent_touch(self, path): 

687 ''' 

688 Update modification time of file without initiating reindexing. 

689 

690 Useful to prolong validity period of data with expiration date. 

691 ''' 

692 

693 apath = abspath(path) 

694 path = self.relpath(apath) 

695 

696 with self.transaction('silent touch') as cursor: 

697 

698 sql = 'SELECT format, size FROM files WHERE path = ?' 

699 fmt, size = execute_get1(cursor, sql, (path,)) 

700 

701 mod = io.get_backend(fmt) 

702 mod.touch(apath) 

703 file_stats = mod.get_stats(apath) 

704 

705 if file_stats[1] != size: 

706 raise FileLoadError( 

707 'Silent update for file "%s" failed: size has changed.' 

708 % apath) 

709 

710 sql = ''' 

711 UPDATE files 

712 SET mtime = ? 

713 WHERE path = ? 

714 ''' 

715 cursor.execute(sql, (file_stats[0], path)) 

716 

717 def _iter_codes_info( 

718 self, kind=None, codes=None, kind_codes_count='kind_codes_count'): 

719 

720 args = [] 

721 sel = '' 

722 if kind is not None: 

723 kind_id = to_kind_id(kind) 

724 

725 sel = 'AND kind_codes.kind_id == ?' 

726 args.append(to_kind_id(kind)) 

727 

728 if codes is not None: 

729 assert kind is not None # TODO supp by recursing possible kinds 

730 kind_id = to_kind_id(kind) 

731 pats = codes_patterns_for_kind(kind_id, codes) 

732 

733 if pats: 

734 # could optimize this by using IN for non-patterns 

735 sel += ' AND ( %s ) ' % ' OR '.join( 

736 ('kind_codes.codes GLOB ?',) * len(pats)) 

737 

738 args.extend(pat.safe_str for pat in pats) 

739 

740 sql = (''' 

741 SELECT 

742 kind_codes.kind_id, 

743 kind_codes.codes, 

744 kind_codes.deltat, 

745 kind_codes.kind_codes_id, 

746 %(kind_codes_count)s.count 

747 FROM %(kind_codes_count)s 

748 INNER JOIN kind_codes 

749 ON %(kind_codes_count)s.kind_codes_id 

750 == kind_codes.kind_codes_id 

751 WHERE %(kind_codes_count)s.count > 0 

752 ''' + sel + ''' 

753 ''') % {'kind_codes_count': kind_codes_count} 

754 

755 for kind_id, scodes, deltat, kcid, count in self._conn.execute( 

756 sql, args): 

757 

758 yield ( 

759 kind_id, to_codes_simple(kind_id, scodes), deltat, kcid, count) 

760 

761 def _iter_deltats(self, kind=None, kind_codes_count='kind_codes_count'): 

762 args = [] 

763 sel = '' 

764 if kind is not None: 

765 assert isinstance(kind, str) 

766 sel = 'AND kind_codes.kind_id == ?' 

767 args.append(to_kind_id(kind)) 

768 

769 sql = (''' 

770 SELECT DISTINCT kind_codes.deltat FROM %(kind_codes_count)s 

771 INNER JOIN kind_codes 

772 ON %(kind_codes_count)s.kind_codes_id 

773 == kind_codes.kind_codes_id 

774 WHERE %(kind_codes_count)s.count > 0 

775 ''' + sel + ''' 

776 ORDER BY kind_codes.deltat 

777 ''') % {'kind_codes_count': kind_codes_count} 

778 

779 for row in self._conn.execute(sql, args): 

780 yield row[0] 

781 

782 def _iter_codes(self, kind=None, kind_codes_count='kind_codes_count'): 

783 args = [] 

784 sel = '' 

785 if kind is not None: 

786 assert isinstance(kind, str) 

787 sel = 'AND kind_codes.kind_id == ?' 

788 args.append(to_kind_id(kind)) 

789 

790 sql = (''' 

791 SELECT DISTINCT kind_codes.kind_id, kind_codes.codes 

792 FROM %(kind_codes_count)s 

793 INNER JOIN kind_codes 

794 ON %(kind_codes_count)s.kind_codes_id 

795 == kind_codes.kind_codes_id 

796 WHERE %(kind_codes_count)s.count > 0 

797 ''' + sel + ''' 

798 ORDER BY kind_codes.codes 

799 

800 ''') % dict(kind_codes_count=kind_codes_count) 

801 

802 for row in self._conn.execute(sql, args): 

803 yield to_codes_simple(*row) 

804 

805 def _iter_kinds(self, codes=None, kind_codes_count='kind_codes_count'): 

806 args = [] 

807 sel = '' 

808 if codes is not None: 

809 sel = 'AND kind_codes.codes == ?' 

810 args.append(codes.safe_str) 

811 

812 sql = (''' 

813 SELECT DISTINCT kind_codes.kind_id FROM %(kind_codes_count)s 

814 INNER JOIN kind_codes 

815 ON %(kind_codes_count)s.kind_codes_id 

816 == kind_codes.kind_codes_id 

817 WHERE %(kind_codes_count)s.count > 0 

818 ''' + sel + ''' 

819 ORDER BY kind_codes.kind_id 

820 ''') % {'kind_codes_count': kind_codes_count} 

821 

822 for row in self._conn.execute(sql, args): 

823 yield to_kind(row[0]) 

824 

825 def iter_paths(self): 

826 for row in self._conn.execute('''SELECT path FROM files'''): 

827 yield self.abspath(row[0]) 

828 

829 def iter_nnuts_by_file(self): 

830 sql = ''' 

831 SELECT 

832 path, 

833 (SELECT COUNT(*) FROM nuts WHERE nuts.file_id = files.file_id) 

834 FROM files 

835 ''' 

836 for row in self._conn.execute(sql): 

837 yield (self.abspath(row[0]),) + row[1:] 

838 

839 def iter_kinds(self, codes=None): 

840 return self._iter_kinds(codes=codes) 

841 

842 def iter_codes(self, kind=None): 

843 return self._iter_codes(kind=kind) 

844 

845 def get_paths(self): 

846 return list(self.iter_paths()) 

847 

848 def get_kinds(self, codes=None): 

849 return list(self.iter_kinds(codes=codes)) 

850 

851 def get_codes(self, kind=None): 

852 return list(self.iter_codes(kind=kind)) 

853 

854 def get_counts(self, kind=None): 

855 d = {} 

856 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

857 if kind_id not in d: 

858 v = d[kind_id] = {} 

859 else: 

860 v = d[kind_id] 

861 

862 if codes not in v: 

863 v[codes] = 0 

864 

865 v[codes] += count 

866 

867 if kind is not None: 

868 return d[to_kind_id(kind)] 

869 else: 

870 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

871 

872 def get_nfiles(self): 

873 sql = '''SELECT COUNT(*) FROM files''' 

874 for row in self._conn.execute(sql): 

875 return row[0] 

876 

877 def get_nnuts(self): 

878 sql = '''SELECT COUNT(*) FROM nuts''' 

879 for row in self._conn.execute(sql): 

880 return row[0] 

881 

882 def get_nnuts_by_file(self): 

883 return list(self.iter_nnuts_by_file()) 

884 

885 def get_total_size(self): 

886 sql = ''' 

887 SELECT SUM(files.size) FROM files 

888 ''' 

889 

890 for row in self._conn.execute(sql): 

891 return row[0] or 0 

892 

893 def get_persistent_names(self): 

894 sql = ''' 

895 SELECT name FROM persistent 

896 ''' 

897 return [row[0] for row in self._conn.execute(sql)] 

898 

899 def get_stats(self): 

900 return DatabaseStats( 

901 nfiles=self.get_nfiles(), 

902 nnuts=self.get_nnuts(), 

903 kinds=self.get_kinds(), 

904 codes=self.get_codes(), 

905 counts=self.get_counts(), 

906 total_size=self.get_total_size(), 

907 persistent=self.get_persistent_names()) 

908 

909 def __str__(self): 

910 return str(self.get_stats()) 

911 

912 def print_tables(self, stream=None): 

913 for table in [ 

914 'persistent', 

915 'files', 

916 'nuts', 

917 'kind_codes', 

918 'kind_codes_count']: 

919 

920 self.print_table(table, stream=stream) 

921 

922 def print_table(self, name, stream=None): 

923 

924 if stream is None: 

925 stream = sys.stdout 

926 

927 class hstr(str): 

928 def __repr__(self): 

929 return self 

930 

931 w = stream.write 

932 w('\n') 

933 w('\n') 

934 w(name) 

935 w('\n') 

936 sql = 'SELECT * FROM %s' % name 

937 tab = [] 

938 if name in self._tables: 

939 headers = self._tables[name] 

940 tab.append([None for _ in headers]) 

941 tab.append([hstr(x[0]) for x in headers]) 

942 tab.append([hstr(x[1]) for x in headers]) 

943 tab.append([None for _ in headers]) 

944 

945 for row in self._conn.execute(sql): 

946 tab.append([x for x in row]) 

947 

948 widths = [ 

949 max((len(repr(x)) if x is not None else 0) for x in col) 

950 for col in zip(*tab)] 

951 

952 for row in tab: 

953 w(' '.join( 

954 (repr(x).ljust(wid) if x is not None else ''.ljust(wid, '-')) 

955 for (x, wid) in zip(row, widths))) 

956 

957 w('\n') 

958 

959 w('\n') 

960 

961 

962class DatabaseStats(Object): 

963 ''' 

964 Container to hold statistics about contents cached in meta-information db. 

965 ''' 

966 

967 nfiles = Int.T( 

968 help='Number of files in database.') 

969 nnuts = Int.T( 

970 help='Number of index nuts in database.') 

971 codes = List.T( 

972 Tuple.T(content_t=String.T()), 

973 help='Available code sequences in database, e.g. ' 

974 '(agency, network, station, location) for stations nuts.') 

975 kinds = List.T( 

976 String.T(), 

977 help='Available content types in database.') 

978 total_size = Int.T( 

979 help='Aggregated file size [bytes] of files referenced in database.') 

980 counts = Dict.T( 

981 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

982 help='Breakdown of how many nuts of any content type and code ' 

983 'sequence are available in database, ``counts[kind][codes]``.') 

984 persistent = List.T( 

985 String.T(), 

986 help='Names of persistent selections stored in database.') 

987 

988 def __str__(self): 

989 kind_counts = dict( 

990 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

991 

992 codes = [c.safe_str for c in self.codes] 

993 

994 if len(codes) > 20: 

995 scodes = '\n' + util.ewrap(codes[:10], indent=' ') \ 

996 + '\n [%i more]\n' % (len(codes) - 20) \ 

997 + util.ewrap(codes[-10:], indent=' ') 

998 else: 

999 scodes = '\n' + util.ewrap(codes, indent=' ') \ 

1000 if codes else '<none>' 

1001 

1002 s = ''' 

1003Available codes: %s 

1004Number of files: %i 

1005Total size of known files: %s 

1006Number of index nuts: %i 

1007Available content kinds: %s 

1008Persistent selections: %s''' % ( 

1009 scodes, 

1010 self.nfiles, 

1011 util.human_bytesize(self.total_size), 

1012 self.nnuts, 

1013 ', '.join('%s: %i' % ( 

1014 kind, kind_counts[kind]) for kind in sorted(self.kinds)), 

1015 ', '.join(self.persistent)) 

1016 

1017 return s 

1018 

1019 

1020__all__ = [ 

1021 'Database', 

1022 'DatabaseStats', 

1023]