1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6import sys 

7import os 

8import logging 

9import sqlite3 

10import re 

11import time 

12 

13from pyrocko.io.io_common import FileLoadError 

14from pyrocko import util 

15from pyrocko.guts import Object, Int, List, Dict, Tuple, String 

16from . import error, io 

17from .model import Nut, to_kind_id, to_kind, to_codes_simple, \ 

18 codes_patterns_for_kind 

19from .error import SquirrelError 

20 

21logger = logging.getLogger('psq.database') 

22 

23guts_prefix = 'squirrel' 

24 

25 

26def abspath(path): 

27 if not path.startswith('virtual:') and not path.startswith('client:'): 

28 return os.path.abspath(path) 

29 else: 

30 return path 

31 

32 

33def versiontuple(s): 

34 fill = [0, 0, 0] 

35 vals = [int(x) for x in s.split('.')] 

36 fill[:len(vals)] = vals 

37 return tuple(fill) 

38 

39 

40class ExecuteGet1Error(SquirrelError): 

41 pass 

42 

43 

44def execute_get1(connection, sql, args=()): 

45 rows = list(connection.execute(sql, args)) 

46 if len(rows) == 1: 

47 return rows[0] 

48 else: 

49 raise ExecuteGet1Error('Expected database entry not found.') 

50 

51 

52g_databases = {} 

53 

54 

55def get_database(path): 

56 path = os.path.abspath(path) 

57 if path not in g_databases: 

58 g_databases[path] = Database(path) 

59 

60 return g_databases[path] 

61 

62 

63def close_database(database): 

64 path = os.path.abspath(database._database_path) 

65 database._conn.close() 

66 if path in g_databases: 

67 del g_databases[path] 

68 

69 

70class Transaction(object): 

71 def __init__( 

72 self, conn, 

73 label='', 

74 mode='immediate', 

75 retry_interval=0.1, 

76 callback=None): 

77 

78 self.cursor = conn.cursor() 

79 assert mode in ('deferred', 'immediate', 'exclusive') 

80 self.mode = mode 

81 self.depth = 0 

82 self.rollback_wanted = False 

83 self.retry_interval = retry_interval 

84 self.callback = callback 

85 self.label = label 

86 self.started = False 

87 

88 def begin(self): 

89 if self.depth == 0: 

90 tries = 0 

91 while True: 

92 try: 

93 tries += 1 

94 self.cursor.execute('BEGIN %s' % self.mode.upper()) 

95 self.started = True 

96 logger.debug( 

97 'Transaction started: %-30s (pid: %s, mode: %s)' 

98 % (self.label, os.getpid(), self.mode)) 

99 

100 self.total_changes_begin \ 

101 = self.cursor.connection.total_changes 

102 break 

103 

104 except sqlite3.OperationalError as e: 

105 if not str(e) == 'database is locked': 

106 raise 

107 

108 logger.info( 

109 'Database is locked retrying in %s s: %s ' 

110 '(pid: %s, tries: %i)' % ( 

111 self.retry_interval, self.label, 

112 os.getpid(), tries)) 

113 

114 time.sleep(self.retry_interval) 

115 

116 self.depth += 1 

117 

118 def commit(self): 

119 if not self.started: 

120 raise Exception( 

121 'Trying to commit without having started a transaction.') 

122 

123 self.depth -= 1 

124 if self.depth == 0: 

125 if not self.rollback_wanted: 

126 self.cursor.execute('COMMIT') 

127 self.started = False 

128 if self.total_changes_begin is not None: 

129 total_changes = self.cursor.connection.total_changes \ 

130 - self.total_changes_begin 

131 else: 

132 total_changes = None 

133 

134 if self.callback is not None and total_changes: 

135 self.callback('modified', total_changes) 

136 

137 logger.debug( 

138 'Transaction completed: %-30s ' 

139 '(pid: %s, changes: %i)' % ( 

140 self.label, os.getpid(), total_changes or 0)) 

141 

142 else: 

143 self.cursor.execute('ROLLBACK') 

144 self.started = False 

145 logger.warning('Deferred rollback executed.') 

146 logger.debug( 

147 'Transaction failed: %-30s (pid: %s)' % ( 

148 self.label, os.getpid())) 

149 self.rollback_wanted = False 

150 

151 def rollback(self): 

152 if not self.started: 

153 raise Exception( 

154 'Trying to rollback without having started a transaction.') 

155 

156 self.depth -= 1 

157 if self.depth == 0: 

158 self.cursor.execute('ROLLBACK') 

159 self.started = False 

160 

161 logger.debug( 

162 'Transaction failed: %-30s (pid: %s)' % ( 

163 self.label, os.getpid())) 

164 

165 self.rollback_wanted = False 

166 else: 

167 logger.warning('Deferred rollback scheduled.') 

168 self.rollback_wanted = True 

169 

170 def close(self): 

171 self.cursor.close() 

172 

173 def __enter__(self): 

174 self.begin() 

175 return self.cursor 

176 

177 def __exit__(self, exc_type, exc_value, traceback): 

178 if exc_type is None: 

179 self.commit() 

180 else: 

181 self.rollback() 

182 

183 if self.depth == 0: 

184 self.close() 

185 self.callback = None 

186 

187 

188class Database(object): 

189 ''' 

190 Shared meta-information database used by Squirrel. 

191 ''' 

192 

193 def __init__(self, database_path=':memory:', log_statements=False): 

194 self._database_path = database_path 

195 if database_path != ':memory:': 

196 util.ensuredirs(database_path) 

197 

198 try: 

199 logger.debug('Opening connection to database: %s' % database_path) 

200 self._conn = sqlite3.connect(database_path, isolation_level=None) 

201 except sqlite3.OperationalError: 

202 raise error.SquirrelError( 

203 'Cannot connect to database: %s' % database_path) 

204 

205 self._conn.text_factory = str 

206 self._tables = {} 

207 

208 if log_statements: 

209 self._conn.set_trace_callback(self._log_statement) 

210 

211 self._listeners = [] 

212 self._initialize_db() 

213 self._basepath = None 

214 

215 self.version = None 

216 

217 def set_basepath(self, basepath): 

218 if basepath is not None: 

219 self._basepath = os.path.abspath(basepath) 

220 else: 

221 self._basepath = None 

222 

223 def relpath(self, path): 

224 if self._basepath is not None and path.startswith( 

225 self._basepath + os.path.sep): 

226 return path[len(self._basepath) + 1:] 

227 else: 

228 return path 

229 

230 def abspath(self, path): 

231 if self._basepath is not None and not path.startswith('virtual:') \ 

232 and not path.startswith('client:') \ 

233 and not os.path.isabs(path): 

234 return os.path.join(self._basepath, path) 

235 else: 

236 return path 

237 

238 def _log_statement(self, statement): 

239 logger.debug(statement) 

240 

241 def get_connection(self): 

242 return self._conn 

243 

244 def transaction(self, label='', mode='immediate'): 

245 return Transaction( 

246 self._conn, 

247 label=label, 

248 mode=mode, 

249 callback=self._notify_listeners) 

250 

251 def add_listener(self, listener): 

252 listener_ref = util.smart_weakref(listener) 

253 self._listeners.append(listener_ref) 

254 return listener_ref 

255 

256 def remove_listener(self, listener_ref): 

257 self._listeners.remove(listener_ref) 

258 

259 def _notify_listeners(self, event, *args): 

260 dead = [] 

261 for listener_ref in self._listeners: 

262 listener = listener_ref() 

263 if listener is not None: 

264 listener(event, *args) 

265 else: 

266 dead.append(listener_ref) 

267 

268 for listener_ref in dead: 

269 self.remove_listener(listener_ref) 

270 

271 def _register_table(self, s): 

272 m = re.search(r'(\S+)\s*\(([^)]+)\)', s) 

273 table_name = m.group(1) 

274 dtypes = m.group(2) 

275 table_header = [] 

276 for dele in dtypes.split(','): 

277 table_header.append(dele.split()[:2]) 

278 

279 self._tables[table_name] = table_header 

280 

281 return s 

282 

283 def _initialize_db(self): 

284 with self.transaction('initialize') as cursor: 

285 cursor.execute( 

286 '''PRAGMA recursive_triggers = true''') 

287 

288 cursor.execute( 

289 '''PRAGMA busy_timeout = 30000''') 

290 

291 if 2 == len(list( 

292 cursor.execute( 

293 ''' 

294 SELECT name FROM sqlite_master 

295 WHERE type = 'table' AND name IN ( 

296 'files', 

297 'persistent') 

298 '''))): 

299 

300 try: 

301 self.version = versiontuple(execute_get1( 

302 cursor, 

303 ''' 

304 SELECT value FROM settings 

305 WHERE key == "version" 

306 ''')[0]) 

307 except sqlite3.OperationalError: 

308 raise error.SquirrelError( 

309 'Squirrel database in pre-release format found: %s\n' 

310 'Please remove the database file and reindex.' 

311 % self._database_path) 

312 

313 if self.version >= (1, 1, 0): 

314 raise error.SquirrelError( 

315 'Squirrel database "%s" is of version %i.%i.%i which ' 

316 'is not supported by this version of Pyrocko. Please ' 

317 'upgrade the Pyrocko library.' 

318 % ((self._database_path, ) + self.version)) 

319 

320 return 

321 

322 cursor.execute(self._register_table( 

323 ''' 

324 CREATE TABLE IF NOT EXISTS settings ( 

325 key text PRIMARY KEY, 

326 value text) 

327 ''')) 

328 

329 cursor.execute( 

330 'INSERT OR IGNORE INTO settings VALUES ("version", "1.0")') 

331 

332 self.version = execute_get1( 

333 cursor, 

334 'SELECT value FROM settings WHERE key == "version"') 

335 

336 cursor.execute(self._register_table( 

337 ''' 

338 CREATE TABLE IF NOT EXISTS files ( 

339 file_id integer PRIMARY KEY, 

340 path text, 

341 format text, 

342 mtime float, 

343 size integer) 

344 ''')) 

345 

346 cursor.execute( 

347 ''' 

348 CREATE UNIQUE INDEX IF NOT EXISTS index_files_path 

349 ON files (path) 

350 ''') 

351 

352 cursor.execute(self._register_table( 

353 ''' 

354 CREATE TABLE IF NOT EXISTS nuts ( 

355 nut_id integer PRIMARY KEY AUTOINCREMENT, 

356 file_id integer, 

357 file_segment integer, 

358 file_element integer, 

359 kind_id integer, 

360 kind_codes_id integer, 

361 tmin_seconds integer, 

362 tmin_offset integer, 

363 tmax_seconds integer, 

364 tmax_offset integer, 

365 kscale integer) 

366 ''')) 

367 

368 cursor.execute( 

369 ''' 

370 CREATE UNIQUE INDEX IF NOT EXISTS index_nuts_file_element 

371 ON nuts (file_id, file_segment, file_element) 

372 ''') 

373 

374 cursor.execute(self._register_table( 

375 ''' 

376 CREATE TABLE IF NOT EXISTS kind_codes ( 

377 kind_codes_id integer PRIMARY KEY, 

378 kind_id integer, 

379 codes text, 

380 deltat float) 

381 ''')) 

382 

383 cursor.execute( 

384 ''' 

385 CREATE UNIQUE INDEX IF NOT EXISTS index_kind_codes 

386 ON kind_codes (kind_id, codes, deltat) 

387 ''') 

388 

389 cursor.execute(self._register_table( 

390 ''' 

391 CREATE TABLE IF NOT EXISTS kind_codes_count ( 

392 kind_codes_id integer PRIMARY KEY, 

393 count integer) 

394 ''')) 

395 

396 cursor.execute( 

397 ''' 

398 CREATE INDEX IF NOT EXISTS index_nuts_file_id 

399 ON nuts (file_id) 

400 ''') 

401 

402 cursor.execute( 

403 ''' 

404 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_delete_file 

405 BEFORE DELETE ON files FOR EACH ROW 

406 BEGIN 

407 DELETE FROM nuts where file_id == old.file_id; 

408 END 

409 ''') 

410 

411 # trigger only on size to make silent update of mtime possible 

412 cursor.execute( 

413 ''' 

414 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_update_file 

415 BEFORE UPDATE OF size ON files FOR EACH ROW 

416 BEGIN 

417 DELETE FROM nuts where file_id == old.file_id; 

418 END 

419 ''') 

420 

421 cursor.execute( 

422 ''' 

423 CREATE TRIGGER IF NOT EXISTS increment_kind_codes 

424 BEFORE INSERT ON nuts FOR EACH ROW 

425 BEGIN 

426 INSERT OR IGNORE INTO kind_codes_count 

427 VALUES (new.kind_codes_id, 0); 

428 UPDATE kind_codes_count 

429 SET count = count + 1 

430 WHERE new.kind_codes_id == kind_codes_id; 

431 END 

432 ''') 

433 

434 cursor.execute( 

435 ''' 

436 CREATE TRIGGER IF NOT EXISTS decrement_kind_codes 

437 BEFORE DELETE ON nuts FOR EACH ROW 

438 BEGIN 

439 UPDATE kind_codes_count 

440 SET count = count - 1 

441 WHERE old.kind_codes_id == kind_codes_id; 

442 END 

443 ''') 

444 

445 cursor.execute(self._register_table( 

446 ''' 

447 CREATE TABLE IF NOT EXISTS persistent ( 

448 name text UNIQUE) 

449 ''')) 

450 

451 def dig(self, nuts, transaction=None): 

452 ''' 

453 Store or update content meta-information. 

454 

455 Given ``nuts`` are assumed to represent an up-to-date and complete 

456 inventory of a set of files. Any old information about these files is 

457 first pruned from the database (via database triggers). If such content 

458 is part of a live selection, it is also removed there. Then the new 

459 content meta-information is inserted into the main database. The 

460 content is not automatically inserted into the live selections again. 

461 It is in the responsibility of the selection object to perform this 

462 step. 

463 ''' 

464 

465 nuts = list(nuts) 

466 

467 if not nuts: 

468 return 

469 

470 files = set() 

471 kind_codes = set() 

472 for nut in nuts: 

473 files.add(( 

474 self.relpath(nut.file_path), 

475 nut.file_format, 

476 nut.file_mtime, 

477 nut.file_size)) 

478 kind_codes.add( 

479 (nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0)) 

480 

481 with (transaction or self.transaction('dig')) as c: 

482 

483 c.executemany( 

484 'INSERT OR IGNORE INTO files VALUES (NULL,?,?,?,?)', files) 

485 

486 c.executemany( 

487 '''UPDATE files SET 

488 format = ?, mtime = ?, size = ? 

489 WHERE path == ? 

490 ''', 

491 ((x[1], x[2], x[3], x[0]) for x in files)) 

492 

493 c.executemany( 

494 'INSERT OR IGNORE INTO kind_codes VALUES (NULL,?,?,?)', 

495 kind_codes) 

496 

497 c.executemany( 

498 ''' 

499 INSERT INTO nuts VALUES 

500 (NULL, ( 

501 SELECT file_id FROM files 

502 WHERE path == ? 

503 ),?,?,?, 

504 ( 

505 SELECT kind_codes_id FROM kind_codes 

506 WHERE kind_id == ? AND codes == ? AND deltat == ? 

507 ), ?,?,?,?,?) 

508 ''', 

509 ((self.relpath(nut.file_path), 

510 nut.file_segment, nut.file_element, 

511 nut.kind_id, 

512 nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0, 

513 nut.tmin_seconds, nut.tmin_offset, 

514 nut.tmax_seconds, nut.tmax_offset, 

515 nut.kscale) for nut in nuts)) 

516 

517 def undig(self, path): 

518 

519 path = self.relpath(abspath(path)) 

520 

521 sql = ''' 

522 SELECT 

523 files.path, 

524 files.format, 

525 files.mtime, 

526 files.size, 

527 nuts.file_segment, 

528 nuts.file_element, 

529 kind_codes.kind_id, 

530 kind_codes.codes, 

531 nuts.tmin_seconds, 

532 nuts.tmin_offset, 

533 nuts.tmax_seconds, 

534 nuts.tmax_offset, 

535 kind_codes.deltat 

536 FROM files 

537 INNER JOIN nuts ON files.file_id = nuts.file_id 

538 INNER JOIN kind_codes 

539 ON nuts.kind_codes_id == kind_codes.kind_codes_id 

540 WHERE path == ? 

541 ''' 

542 

543 return [Nut(values_nocheck=(self.abspath(row[0]),) + row[1:]) 

544 for row in self._conn.execute(sql, (path,))] 

545 

546 def undig_all(self): 

547 sql = ''' 

548 SELECT 

549 files.path, 

550 files.format, 

551 files.mtime, 

552 files.size, 

553 nuts.file_segment, 

554 nuts.file_element, 

555 kind_codes.kind_id, 

556 kind_codes.codes, 

557 nuts.tmin_seconds, 

558 nuts.tmin_offset, 

559 nuts.tmax_seconds, 

560 nuts.tmax_offset, 

561 kind_codes.deltat 

562 FROM files 

563 INNER JOIN nuts ON files.file_id == nuts.file_id 

564 INNER JOIN kind_codes 

565 ON nuts.kind_codes_id == kind_codes.kind_codes_id 

566 ''' 

567 

568 nuts = [] 

569 path = None 

570 for values in self._conn.execute(sql): 

571 if path is not None and values[0] != path: 

572 yield path, nuts 

573 nuts = [] 

574 

575 path = self.abspath(values[0]) 

576 

577 if values[1] is not None: 

578 nuts.append(Nut(values_nocheck=(path,) + values[1:])) 

579 

580 if path is not None: 

581 yield path, nuts 

582 

583 def undig_few(self, paths, format='detect'): 

584 for path in paths: 

585 nuts = self.undig(path) 

586 if nuts: 

587 yield (nuts[0].file_format, path), nuts 

588 else: 

589 yield (format, path), [] 

590 

591 def undig_many(self, paths, show_progress=True): 

592 selection = self.new_selection(paths, show_progress=show_progress) 

593 

594 for (_, path), nuts in selection.undig_grouped(): 

595 yield path, nuts 

596 

597 del selection 

598 

599 def new_selection(self, paths=None, format='detect', show_progress=True): 

600 from .selection import Selection 

601 selection = Selection(self) 

602 if paths: 

603 selection.add(paths, format=format, show_progress=show_progress) 

604 return selection 

605 

606 def undig_content(self, nut): 

607 return None 

608 

609 def remove(self, path): 

610 ''' 

611 Prune content meta-information about a given file. 

612 

613 All content pieces belonging to file ``path`` are removed from the 

614 main database and any attached live selections (via database triggers). 

615 ''' 

616 

617 path = self.relpath(abspath(path)) 

618 

619 with self.transaction('remove file') as cursor: 

620 cursor.execute( 

621 'DELETE FROM files WHERE path = ?', (path,)) 

622 

623 def remove_glob(self, pattern): 

624 ''' 

625 Prune content meta-information about files matching given pattern. 

626 

627 All content pieces belonging to files who's pathes match the given 

628 ``pattern`` are removed from the main database and any attached live 

629 selections (via database triggers). 

630 ''' 

631 

632 with self.transaction('remove file glob') as cursor: 

633 return cursor.execute( 

634 'DELETE FROM files WHERE path GLOB ?', (pattern,)).rowcount 

635 

636 def _remove_volatile(self): 

637 ''' 

638 Prune leftover volatile content from database. 

639 

640 If the cleanup handler of an attached selection is not called, e.g. due 

641 to a crash or terminated process, volatile content will not be removed 

642 properly. This method will delete such leftover entries. 

643 

644 This is a mainenance operatation which should only be called when no 

645 apps are using the database because it would remove volatile content 

646 currently used by the apps. 

647 ''' 

648 

649 with self.transaction('remove volatile') as cursor: 

650 return cursor.execute( 

651 ''' 

652 DELETE FROM files 

653 WHERE path LIKE 'virtual:volatile:%' 

654 ''').rowcount 

655 

656 def reset(self, path, transaction=None): 

657 ''' 

658 Prune information associated with a given file, but keep the file path. 

659 

660 This method is called when reading a file failed. File attributes, 

661 format, size and modification time are set to NULL. File content 

662 meta-information is removed from the database and any attached live 

663 selections (via database triggers). 

664 ''' 

665 

666 path = self.relpath(abspath(path)) 

667 

668 with (transaction or self.transaction('reset file')) as cursor: 

669 cursor.execute( 

670 ''' 

671 UPDATE files SET 

672 format = NULL, 

673 mtime = NULL, 

674 size = NULL 

675 WHERE path = ? 

676 ''', (path,)) 

677 

678 def silent_touch(self, path): 

679 ''' 

680 Update modification time of file without initiating reindexing. 

681 

682 Useful to prolong validity period of data with expiration date. 

683 ''' 

684 

685 apath = abspath(path) 

686 path = self.relpath(apath) 

687 

688 with self.transaction('silent touch') as cursor: 

689 

690 sql = 'SELECT format, size FROM files WHERE path = ?' 

691 fmt, size = execute_get1(cursor, sql, (path,)) 

692 

693 mod = io.get_backend(fmt) 

694 mod.touch(apath) 

695 file_stats = mod.get_stats(apath) 

696 

697 if file_stats[1] != size: 

698 raise FileLoadError( 

699 'Silent update for file "%s" failed: size has changed.' 

700 % apath) 

701 

702 sql = ''' 

703 UPDATE files 

704 SET mtime = ? 

705 WHERE path = ? 

706 ''' 

707 cursor.execute(sql, (file_stats[0], path)) 

708 

709 def _iter_codes_info( 

710 self, kind=None, codes=None, kind_codes_count='kind_codes_count'): 

711 

712 args = [] 

713 sel = '' 

714 if kind is not None: 

715 kind_id = to_kind_id(kind) 

716 

717 sel = 'AND kind_codes.kind_id == ?' 

718 args.append(to_kind_id(kind)) 

719 

720 if codes is not None: 

721 assert kind is not None # TODO supp by recursing possible kinds 

722 kind_id = to_kind_id(kind) 

723 pats = codes_patterns_for_kind(kind_id, codes) 

724 

725 if pats: 

726 # could optimize this by using IN for non-patterns 

727 sel += ' AND ( %s ) ' % ' OR '.join( 

728 ('kind_codes.codes GLOB ?',) * len(pats)) 

729 

730 args.extend(pat.safe_str for pat in pats) 

731 

732 sql = (''' 

733 SELECT 

734 kind_codes.kind_id, 

735 kind_codes.codes, 

736 kind_codes.deltat, 

737 kind_codes.kind_codes_id, 

738 %(kind_codes_count)s.count 

739 FROM %(kind_codes_count)s 

740 INNER JOIN kind_codes 

741 ON %(kind_codes_count)s.kind_codes_id 

742 == kind_codes.kind_codes_id 

743 WHERE %(kind_codes_count)s.count > 0 

744 ''' + sel + ''' 

745 ''') % {'kind_codes_count': kind_codes_count} 

746 

747 for kind_id, scodes, deltat, kcid, count in self._conn.execute( 

748 sql, args): 

749 

750 yield ( 

751 kind_id, to_codes_simple(kind_id, scodes), deltat, kcid, count) 

752 

753 def _iter_deltats(self, kind=None, kind_codes_count='kind_codes_count'): 

754 args = [] 

755 sel = '' 

756 if kind is not None: 

757 assert isinstance(kind, str) 

758 sel = 'AND kind_codes.kind_id == ?' 

759 args.append(to_kind_id(kind)) 

760 

761 sql = (''' 

762 SELECT DISTINCT kind_codes.deltat FROM %(kind_codes_count)s 

763 INNER JOIN kind_codes 

764 ON %(kind_codes_count)s.kind_codes_id 

765 == kind_codes.kind_codes_id 

766 WHERE %(kind_codes_count)s.count > 0 

767 ''' + sel + ''' 

768 ORDER BY kind_codes.deltat 

769 ''') % {'kind_codes_count': kind_codes_count} 

770 

771 for row in self._conn.execute(sql, args): 

772 yield row[0] 

773 

774 def _iter_codes(self, kind=None, kind_codes_count='kind_codes_count'): 

775 args = [] 

776 sel = '' 

777 if kind is not None: 

778 assert isinstance(kind, str) 

779 sel = 'AND kind_codes.kind_id == ?' 

780 args.append(to_kind_id(kind)) 

781 

782 sql = (''' 

783 SELECT DISTINCT kind_codes.kind_id, kind_codes.codes 

784 FROM %(kind_codes_count)s 

785 INNER JOIN kind_codes 

786 ON %(kind_codes_count)s.kind_codes_id 

787 == kind_codes.kind_codes_id 

788 WHERE %(kind_codes_count)s.count > 0 

789 ''' + sel + ''' 

790 ORDER BY kind_codes.codes 

791 

792 ''') % dict(kind_codes_count=kind_codes_count) 

793 

794 for row in self._conn.execute(sql, args): 

795 yield to_codes_simple(*row) 

796 

797 def _iter_kinds(self, codes=None, kind_codes_count='kind_codes_count'): 

798 args = [] 

799 sel = '' 

800 if codes is not None: 

801 sel = 'AND kind_codes.codes == ?' 

802 args.append(codes.safe_str) 

803 

804 sql = (''' 

805 SELECT DISTINCT kind_codes.kind_id FROM %(kind_codes_count)s 

806 INNER JOIN kind_codes 

807 ON %(kind_codes_count)s.kind_codes_id 

808 == kind_codes.kind_codes_id 

809 WHERE %(kind_codes_count)s.count > 0 

810 ''' + sel + ''' 

811 ORDER BY kind_codes.kind_id 

812 ''') % {'kind_codes_count': kind_codes_count} 

813 

814 for row in self._conn.execute(sql, args): 

815 yield to_kind(row[0]) 

816 

817 def iter_paths(self): 

818 for row in self._conn.execute('''SELECT path FROM files'''): 

819 yield self.abspath(row[0]) 

820 

821 def iter_nnuts_by_file(self): 

822 sql = ''' 

823 SELECT 

824 path, 

825 (SELECT COUNT(*) FROM nuts WHERE nuts.file_id = files.file_id) 

826 FROM files 

827 ''' 

828 for row in self._conn.execute(sql): 

829 yield (self.abspath(row[0]),) + row[1:] 

830 

831 def iter_kinds(self, codes=None): 

832 return self._iter_kinds(codes=codes) 

833 

834 def iter_codes(self, kind=None): 

835 return self._iter_codes(kind=kind) 

836 

837 def get_paths(self): 

838 return list(self.iter_paths()) 

839 

840 def get_kinds(self, codes=None): 

841 return list(self.iter_kinds(codes=codes)) 

842 

843 def get_codes(self, kind=None): 

844 return list(self.iter_codes(kind=kind)) 

845 

846 def get_counts(self, kind=None): 

847 d = {} 

848 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

849 if kind_id not in d: 

850 v = d[kind_id] = {} 

851 else: 

852 v = d[kind_id] 

853 

854 if codes not in v: 

855 v[codes] = 0 

856 

857 v[codes] += count 

858 

859 if kind is not None: 

860 return d[to_kind_id(kind)] 

861 else: 

862 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

863 

864 def get_nfiles(self): 

865 sql = '''SELECT COUNT(*) FROM files''' 

866 for row in self._conn.execute(sql): 

867 return row[0] 

868 

869 def get_nnuts(self): 

870 sql = '''SELECT COUNT(*) FROM nuts''' 

871 for row in self._conn.execute(sql): 

872 return row[0] 

873 

874 def get_nnuts_by_file(self): 

875 return list(self.iter_nnuts_by_file()) 

876 

877 def get_total_size(self): 

878 sql = ''' 

879 SELECT SUM(files.size) FROM files 

880 ''' 

881 

882 for row in self._conn.execute(sql): 

883 return row[0] or 0 

884 

885 def get_persistent_names(self): 

886 sql = ''' 

887 SELECT name FROM persistent 

888 ''' 

889 return [row[0] for row in self._conn.execute(sql)] 

890 

891 def get_stats(self): 

892 return DatabaseStats( 

893 nfiles=self.get_nfiles(), 

894 nnuts=self.get_nnuts(), 

895 kinds=self.get_kinds(), 

896 codes=self.get_codes(), 

897 counts=self.get_counts(), 

898 total_size=self.get_total_size(), 

899 persistent=self.get_persistent_names()) 

900 

901 def __str__(self): 

902 return str(self.get_stats()) 

903 

904 def print_tables(self, stream=None): 

905 for table in [ 

906 'persistent', 

907 'files', 

908 'nuts', 

909 'kind_codes', 

910 'kind_codes_count']: 

911 

912 self.print_table(table, stream=stream) 

913 

914 def print_table(self, name, stream=None): 

915 

916 if stream is None: 

917 stream = sys.stdout 

918 

919 class hstr(str): 

920 def __repr__(self): 

921 return self 

922 

923 w = stream.write 

924 w('\n') 

925 w('\n') 

926 w(name) 

927 w('\n') 

928 sql = 'SELECT * FROM %s' % name 

929 tab = [] 

930 if name in self._tables: 

931 headers = self._tables[name] 

932 tab.append([None for _ in headers]) 

933 tab.append([hstr(x[0]) for x in headers]) 

934 tab.append([hstr(x[1]) for x in headers]) 

935 tab.append([None for _ in headers]) 

936 

937 for row in self._conn.execute(sql): 

938 tab.append([x for x in row]) 

939 

940 widths = [ 

941 max((len(repr(x)) if x is not None else 0) for x in col) 

942 for col in zip(*tab)] 

943 

944 for row in tab: 

945 w(' '.join( 

946 (repr(x).ljust(wid) if x is not None else ''.ljust(wid, '-')) 

947 for (x, wid) in zip(row, widths))) 

948 

949 w('\n') 

950 

951 w('\n') 

952 

953 

954class DatabaseStats(Object): 

955 ''' 

956 Container to hold statistics about contents cached in meta-information db. 

957 ''' 

958 

959 nfiles = Int.T( 

960 help='Number of files in database.') 

961 nnuts = Int.T( 

962 help='Number of index nuts in database.') 

963 codes = List.T( 

964 Tuple.T(content_t=String.T()), 

965 help='Available code sequences in database, e.g. ' 

966 '(agency, network, station, location) for stations nuts.') 

967 kinds = List.T( 

968 String.T(), 

969 help='Available content types in database.') 

970 total_size = Int.T( 

971 help='Aggregated file size [bytes] of files referenced in database.') 

972 counts = Dict.T( 

973 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

974 help='Breakdown of how many nuts of any content type and code ' 

975 'sequence are available in database, ``counts[kind][codes]``.') 

976 persistent = List.T( 

977 String.T(), 

978 help='Names of persistent selections stored in database.') 

979 

980 def __str__(self): 

981 kind_counts = dict( 

982 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

983 

984 codes = [c.safe_str for c in self.codes] 

985 

986 if len(codes) > 20: 

987 scodes = '\n' + util.ewrap(codes[:10], indent=' ') \ 

988 + '\n [%i more]\n' % (len(codes) - 20) \ 

989 + util.ewrap(codes[-10:], indent=' ') 

990 else: 

991 scodes = '\n' + util.ewrap(codes, indent=' ') \ 

992 if codes else '<none>' 

993 

994 s = ''' 

995Available codes: %s 

996Number of files: %i 

997Total size of known files: %s 

998Number of index nuts: %i 

999Available content kinds: %s 

1000Persistent selections: %s''' % ( 

1001 scodes, 

1002 self.nfiles, 

1003 util.human_bytesize(self.total_size), 

1004 self.nnuts, 

1005 ', '.join('%s: %i' % ( 

1006 kind, kind_counts[kind]) for kind in sorted(self.kinds)), 

1007 ', '.join(self.persistent)) 

1008 

1009 return s 

1010 

1011 

1012__all__ = [ 

1013 'Database', 

1014 'DatabaseStats', 

1015]