1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6import sys 

7import os 

8import logging 

9import sqlite3 

10import re 

11import time 

12import types 

13import weakref 

14 

15from pyrocko.io.io_common import FileLoadError 

16from pyrocko import util 

17from pyrocko.guts import Object, Int, List, Dict, Tuple, String 

18from . import error, io 

19from .model import Nut, to_kind_id, to_kind, to_codes_simple, \ 

20 codes_patterns_for_kind 

21from .error import SquirrelError 

22 

23logger = logging.getLogger('psq.database') 

24 

25guts_prefix = 'squirrel' 

26 

27 

28def abspath(path): 

29 if not path.startswith('virtual:') and not path.startswith('client:'): 

30 return os.path.abspath(path) 

31 else: 

32 return path 

33 

34 

35def versiontuple(s): 

36 fill = [0, 0, 0] 

37 vals = [int(x) for x in s.split('.')] 

38 fill[:len(vals)] = vals 

39 return tuple(fill) 

40 

41 

42class ExecuteGet1Error(SquirrelError): 

43 pass 

44 

45 

46def execute_get1(connection, sql, args=()): 

47 rows = list(connection.execute(sql, args)) 

48 if len(rows) == 1: 

49 return rows[0] 

50 else: 

51 raise ExecuteGet1Error('Expected database entry not found.') 

52 

53 

54g_databases = {} 

55 

56 

57def get_database(path): 

58 path = os.path.abspath(path) 

59 if path not in g_databases: 

60 g_databases[path] = Database(path) 

61 

62 return g_databases[path] 

63 

64 

65def close_database(database): 

66 path = os.path.abspath(database._database_path) 

67 database._conn.close() 

68 if path in g_databases: 

69 del g_databases[path] 

70 

71 

72class Transaction(object): 

73 def __init__( 

74 self, conn, 

75 label='', 

76 mode='immediate', 

77 retry_interval=0.1, 

78 callback=None): 

79 

80 self.cursor = conn.cursor() 

81 assert mode in ('deferred', 'immediate', 'exclusive') 

82 self.mode = mode 

83 self.depth = 0 

84 self.rollback_wanted = False 

85 self.retry_interval = retry_interval 

86 self.callback = callback 

87 self.label = label 

88 self.started = False 

89 

90 def begin(self): 

91 if self.depth == 0: 

92 tries = 0 

93 while True: 

94 try: 

95 tries += 1 

96 self.cursor.execute('BEGIN %s' % self.mode.upper()) 

97 self.started = True 

98 logger.debug( 

99 'Transaction started: %-30s (pid: %s, mode: %s)' 

100 % (self.label, os.getpid(), self.mode)) 

101 

102 self.total_changes_begin \ 

103 = self.cursor.connection.total_changes 

104 break 

105 

106 except sqlite3.OperationalError as e: 

107 if not str(e) == 'database is locked': 

108 raise 

109 

110 logger.info( 

111 'Database is locked retrying in %s s: %s ' 

112 '(pid: %s, tries: %i)' % ( 

113 self.retry_interval, self.label, 

114 os.getpid(), tries)) 

115 

116 time.sleep(self.retry_interval) 

117 

118 self.depth += 1 

119 

120 def commit(self): 

121 if not self.started: 

122 raise Exception( 

123 'Trying to commit without having started a transaction.') 

124 

125 self.depth -= 1 

126 if self.depth == 0: 

127 if not self.rollback_wanted: 

128 self.cursor.execute('COMMIT') 

129 self.started = False 

130 if self.total_changes_begin is not None: 

131 total_changes = self.cursor.connection.total_changes \ 

132 - self.total_changes_begin 

133 else: 

134 total_changes = None 

135 

136 if self.callback is not None and total_changes: 

137 self.callback('modified', total_changes) 

138 

139 logger.debug( 

140 'Transaction completed: %-30s ' 

141 '(pid: %s, changes: %i)' % ( 

142 self.label, os.getpid(), total_changes or 0)) 

143 

144 else: 

145 self.cursor.execute('ROLLBACK') 

146 self.started = False 

147 logger.warning('Deferred rollback executed.') 

148 logger.debug( 

149 'Transaction failed: %-30s (pid: %s)' % ( 

150 self.label, os.getpid())) 

151 self.rollback_wanted = False 

152 

153 def rollback(self): 

154 if not self.started: 

155 raise Exception( 

156 'Trying to rollback without having started a transaction.') 

157 

158 self.depth -= 1 

159 if self.depth == 0: 

160 self.cursor.execute('ROLLBACK') 

161 self.started = False 

162 

163 logger.debug( 

164 'Transaction failed: %-30s (pid: %s)' % ( 

165 self.label, os.getpid())) 

166 

167 self.rollback_wanted = False 

168 else: 

169 logger.warning('Deferred rollback scheduled.') 

170 self.rollback_wanted = True 

171 

172 def close(self): 

173 self.cursor.close() 

174 

175 def __enter__(self): 

176 self.begin() 

177 return self.cursor 

178 

179 def __exit__(self, exc_type, exc_value, traceback): 

180 if exc_type is None: 

181 self.commit() 

182 else: 

183 self.rollback() 

184 

185 if self.depth == 0: 

186 self.close() 

187 self.callback = None 

188 

189 

190class Database(object): 

191 ''' 

192 Shared meta-information database used by Squirrel. 

193 ''' 

194 

195 def __init__(self, database_path=':memory:', log_statements=False): 

196 self._database_path = database_path 

197 if database_path != ':memory:': 

198 util.ensuredirs(database_path) 

199 

200 try: 

201 logger.debug('Opening connection to database: %s' % database_path) 

202 self._conn = sqlite3.connect( 

203 database_path, 

204 isolation_level=None, 

205 check_same_thread=False if sqlite3.threadsafety else True) 

206 except sqlite3.OperationalError: 

207 raise error.SquirrelError( 

208 'Cannot connect to database: %s' % database_path) 

209 

210 self._conn.text_factory = str 

211 self._tables = {} 

212 

213 if log_statements: 

214 self._conn.set_trace_callback(self._log_statement) 

215 

216 self._listeners = [] 

217 self._initialize_db() 

218 self._basepath = None 

219 

220 self.version = None 

221 

222 def set_basepath(self, basepath): 

223 if basepath is not None: 

224 self._basepath = os.path.abspath(basepath) 

225 else: 

226 self._basepath = None 

227 

228 def relpath(self, path): 

229 if self._basepath is not None and path.startswith( 

230 self._basepath + os.path.sep): 

231 return path[len(self._basepath) + 1:] 

232 else: 

233 return path 

234 

235 def abspath(self, path): 

236 if self._basepath is not None and not path.startswith('virtual:') \ 

237 and not path.startswith('client:') \ 

238 and not os.path.isabs(path): 

239 return os.path.join(self._basepath, path) 

240 else: 

241 return path 

242 

243 def _log_statement(self, statement): 

244 logger.debug(statement) 

245 

246 def get_connection(self): 

247 return self._conn 

248 

249 def transaction(self, label='', mode='immediate'): 

250 return Transaction( 

251 self._conn, 

252 label=label, 

253 mode=mode, 

254 callback=self._notify_listeners) 

255 

256 def add_listener(self, listener): 

257 if isinstance(listener, types.MethodType): 

258 listener_ref = weakref.WeakMethod(listener) 

259 else: 

260 listener_ref = weakref.ref(listener) 

261 

262 self._listeners.append(listener_ref) 

263 return listener_ref 

264 

265 def remove_listener(self, listener_ref): 

266 self._listeners.remove(listener_ref) 

267 

268 def _notify_listeners(self, event, *args): 

269 dead = [] 

270 for listener_ref in self._listeners: 

271 listener = listener_ref() 

272 if listener is not None: 

273 listener(event, *args) 

274 else: 

275 dead.append(listener_ref) 

276 

277 for listener_ref in dead: 

278 self.remove_listener(listener_ref) 

279 

280 def _register_table(self, s): 

281 m = re.search(r'(\S+)\s*\(([^)]+)\)', s) 

282 table_name = m.group(1) 

283 dtypes = m.group(2) 

284 table_header = [] 

285 for dele in dtypes.split(','): 

286 table_header.append(dele.split()[:2]) 

287 

288 self._tables[table_name] = table_header 

289 

290 return s 

291 

292 def _initialize_db(self): 

293 with self.transaction('initialize') as cursor: 

294 cursor.execute( 

295 '''PRAGMA recursive_triggers = true''') 

296 

297 cursor.execute( 

298 '''PRAGMA busy_timeout = 30000''') 

299 

300 if 2 == len(list( 

301 cursor.execute( 

302 ''' 

303 SELECT name FROM sqlite_master 

304 WHERE type = 'table' AND name IN ( 

305 'files', 

306 'persistent') 

307 '''))): 

308 

309 try: 

310 self.version = versiontuple(execute_get1( 

311 cursor, 

312 ''' 

313 SELECT value FROM settings 

314 WHERE key == "version" 

315 ''')[0]) 

316 except sqlite3.OperationalError: 

317 raise error.SquirrelError( 

318 'Squirrel database in pre-release format found: %s\n' 

319 'Please remove the database file and reindex.' 

320 % self._database_path) 

321 

322 if self.version >= (1, 1, 0): 

323 raise error.SquirrelError( 

324 'Squirrel database "%s" is of version %i.%i.%i which ' 

325 'is not supported by this version of Pyrocko. Please ' 

326 'upgrade the Pyrocko library.' 

327 % ((self._database_path, ) + self.version)) 

328 

329 return 

330 

331 cursor.execute(self._register_table( 

332 ''' 

333 CREATE TABLE IF NOT EXISTS settings ( 

334 key text PRIMARY KEY, 

335 value text) 

336 ''')) 

337 

338 cursor.execute( 

339 'INSERT OR IGNORE INTO settings VALUES ("version", "1.0")') 

340 

341 self.version = execute_get1( 

342 cursor, 

343 'SELECT value FROM settings WHERE key == "version"') 

344 

345 cursor.execute(self._register_table( 

346 ''' 

347 CREATE TABLE IF NOT EXISTS files ( 

348 file_id integer PRIMARY KEY, 

349 path text, 

350 format text, 

351 mtime float, 

352 size integer) 

353 ''')) 

354 

355 cursor.execute( 

356 ''' 

357 CREATE UNIQUE INDEX IF NOT EXISTS index_files_path 

358 ON files (path) 

359 ''') 

360 

361 cursor.execute(self._register_table( 

362 ''' 

363 CREATE TABLE IF NOT EXISTS nuts ( 

364 nut_id integer PRIMARY KEY AUTOINCREMENT, 

365 file_id integer, 

366 file_segment integer, 

367 file_element integer, 

368 kind_id integer, 

369 kind_codes_id integer, 

370 tmin_seconds integer, 

371 tmin_offset integer, 

372 tmax_seconds integer, 

373 tmax_offset integer, 

374 kscale integer) 

375 ''')) 

376 

377 cursor.execute( 

378 ''' 

379 CREATE UNIQUE INDEX IF NOT EXISTS index_nuts_file_element 

380 ON nuts (file_id, file_segment, file_element) 

381 ''') 

382 

383 cursor.execute(self._register_table( 

384 ''' 

385 CREATE TABLE IF NOT EXISTS kind_codes ( 

386 kind_codes_id integer PRIMARY KEY, 

387 kind_id integer, 

388 codes text, 

389 deltat float) 

390 ''')) 

391 

392 cursor.execute( 

393 ''' 

394 CREATE UNIQUE INDEX IF NOT EXISTS index_kind_codes 

395 ON kind_codes (kind_id, codes, deltat) 

396 ''') 

397 

398 cursor.execute(self._register_table( 

399 ''' 

400 CREATE TABLE IF NOT EXISTS kind_codes_count ( 

401 kind_codes_id integer PRIMARY KEY, 

402 count integer) 

403 ''')) 

404 

405 cursor.execute( 

406 ''' 

407 CREATE INDEX IF NOT EXISTS index_nuts_file_id 

408 ON nuts (file_id) 

409 ''') 

410 

411 cursor.execute( 

412 ''' 

413 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_delete_file 

414 BEFORE DELETE ON files FOR EACH ROW 

415 BEGIN 

416 DELETE FROM nuts where file_id == old.file_id; 

417 END 

418 ''') 

419 

420 # trigger only on size to make silent update of mtime possible 

421 cursor.execute( 

422 ''' 

423 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_update_file 

424 BEFORE UPDATE OF size ON files FOR EACH ROW 

425 BEGIN 

426 DELETE FROM nuts where file_id == old.file_id; 

427 END 

428 ''') 

429 

430 cursor.execute( 

431 ''' 

432 CREATE TRIGGER IF NOT EXISTS increment_kind_codes 

433 BEFORE INSERT ON nuts FOR EACH ROW 

434 BEGIN 

435 INSERT OR IGNORE INTO kind_codes_count 

436 VALUES (new.kind_codes_id, 0); 

437 UPDATE kind_codes_count 

438 SET count = count + 1 

439 WHERE new.kind_codes_id == kind_codes_id; 

440 END 

441 ''') 

442 

443 cursor.execute( 

444 ''' 

445 CREATE TRIGGER IF NOT EXISTS decrement_kind_codes 

446 BEFORE DELETE ON nuts FOR EACH ROW 

447 BEGIN 

448 UPDATE kind_codes_count 

449 SET count = count - 1 

450 WHERE old.kind_codes_id == kind_codes_id; 

451 END 

452 ''') 

453 

454 cursor.execute(self._register_table( 

455 ''' 

456 CREATE TABLE IF NOT EXISTS persistent ( 

457 name text UNIQUE) 

458 ''')) 

459 

460 def dig(self, nuts, transaction=None): 

461 ''' 

462 Store or update content meta-information. 

463 

464 Given ``nuts`` are assumed to represent an up-to-date and complete 

465 inventory of a set of files. Any old information about these files is 

466 first pruned from the database (via database triggers). If such content 

467 is part of a live selection, it is also removed there. Then the new 

468 content meta-information is inserted into the main database. The 

469 content is not automatically inserted into the live selections again. 

470 It is in the responsibility of the selection object to perform this 

471 step. 

472 ''' 

473 

474 nuts = list(nuts) 

475 

476 if not nuts: 

477 return 

478 

479 files = set() 

480 kind_codes = set() 

481 for nut in nuts: 

482 files.add(( 

483 self.relpath(nut.file_path), 

484 nut.file_format, 

485 nut.file_mtime, 

486 nut.file_size)) 

487 kind_codes.add( 

488 (nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0)) 

489 

490 with (transaction or self.transaction('dig')) as c: 

491 

492 c.executemany( 

493 'INSERT OR IGNORE INTO files VALUES (NULL,?,?,?,?)', files) 

494 

495 c.executemany( 

496 '''UPDATE files SET 

497 format = ?, mtime = ?, size = ? 

498 WHERE path == ? 

499 ''', 

500 ((x[1], x[2], x[3], x[0]) for x in files)) 

501 

502 c.executemany( 

503 'INSERT OR IGNORE INTO kind_codes VALUES (NULL,?,?,?)', 

504 kind_codes) 

505 

506 c.executemany( 

507 ''' 

508 INSERT INTO nuts VALUES 

509 (NULL, ( 

510 SELECT file_id FROM files 

511 WHERE path == ? 

512 ),?,?,?, 

513 ( 

514 SELECT kind_codes_id FROM kind_codes 

515 WHERE kind_id == ? AND codes == ? AND deltat == ? 

516 ), ?,?,?,?,?) 

517 ''', 

518 ((self.relpath(nut.file_path), 

519 nut.file_segment, nut.file_element, 

520 nut.kind_id, 

521 nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0, 

522 nut.tmin_seconds, nut.tmin_offset, 

523 nut.tmax_seconds, nut.tmax_offset, 

524 nut.kscale) for nut in nuts)) 

525 

526 def undig(self, path): 

527 

528 path = self.relpath(abspath(path)) 

529 

530 sql = ''' 

531 SELECT 

532 files.path, 

533 files.format, 

534 files.mtime, 

535 files.size, 

536 nuts.file_segment, 

537 nuts.file_element, 

538 kind_codes.kind_id, 

539 kind_codes.codes, 

540 nuts.tmin_seconds, 

541 nuts.tmin_offset, 

542 nuts.tmax_seconds, 

543 nuts.tmax_offset, 

544 kind_codes.deltat 

545 FROM files 

546 INNER JOIN nuts ON files.file_id = nuts.file_id 

547 INNER JOIN kind_codes 

548 ON nuts.kind_codes_id == kind_codes.kind_codes_id 

549 WHERE path == ? 

550 ''' 

551 

552 return [Nut(values_nocheck=(self.abspath(row[0]),) + row[1:]) 

553 for row in self._conn.execute(sql, (path,))] 

554 

555 def undig_all(self): 

556 sql = ''' 

557 SELECT 

558 files.path, 

559 files.format, 

560 files.mtime, 

561 files.size, 

562 nuts.file_segment, 

563 nuts.file_element, 

564 kind_codes.kind_id, 

565 kind_codes.codes, 

566 nuts.tmin_seconds, 

567 nuts.tmin_offset, 

568 nuts.tmax_seconds, 

569 nuts.tmax_offset, 

570 kind_codes.deltat 

571 FROM files 

572 INNER JOIN nuts ON files.file_id == nuts.file_id 

573 INNER JOIN kind_codes 

574 ON nuts.kind_codes_id == kind_codes.kind_codes_id 

575 ''' 

576 

577 nuts = [] 

578 path = None 

579 for values in self._conn.execute(sql): 

580 if path is not None and values[0] != path: 

581 yield path, nuts 

582 nuts = [] 

583 

584 path = self.abspath(values[0]) 

585 

586 if values[1] is not None: 

587 nuts.append(Nut(values_nocheck=(path,) + values[1:])) 

588 

589 if path is not None: 

590 yield path, nuts 

591 

592 def undig_few(self, paths, format='detect'): 

593 for path in paths: 

594 nuts = self.undig(path) 

595 if nuts: 

596 yield (nuts[0].file_format, path), nuts 

597 else: 

598 yield (format, path), [] 

599 

600 def undig_many(self, paths, show_progress=True): 

601 selection = self.new_selection(paths, show_progress=show_progress) 

602 

603 for (_, path), nuts in selection.undig_grouped(): 

604 yield path, nuts 

605 

606 del selection 

607 

608 def new_selection(self, paths=None, format='detect', show_progress=True): 

609 from .selection import Selection 

610 selection = Selection(self) 

611 if paths: 

612 selection.add(paths, format=format, show_progress=show_progress) 

613 return selection 

614 

615 def undig_content(self, nut): 

616 return None 

617 

618 def remove(self, path): 

619 ''' 

620 Prune content meta-information about a given file. 

621 

622 All content pieces belonging to file ``path`` are removed from the 

623 main database and any attached live selections (via database triggers). 

624 ''' 

625 

626 path = self.relpath(abspath(path)) 

627 

628 with self.transaction('remove file') as cursor: 

629 cursor.execute( 

630 'DELETE FROM files WHERE path = ?', (path,)) 

631 

632 def remove_glob(self, pattern): 

633 ''' 

634 Prune content meta-information about files matching given pattern. 

635 

636 All content pieces belonging to files who's pathes match the given 

637 ``pattern`` are removed from the main database and any attached live 

638 selections (via database triggers). 

639 ''' 

640 

641 with self.transaction('remove file glob') as cursor: 

642 return cursor.execute( 

643 'DELETE FROM files WHERE path GLOB ?', (pattern,)).rowcount 

644 

645 def _remove_volatile(self): 

646 ''' 

647 Prune leftover volatile content from database. 

648 

649 If the cleanup handler of an attached selection is not called, e.g. due 

650 to a crash or terminated process, volatile content will not be removed 

651 properly. This method will delete such leftover entries. 

652 

653 This is a mainenance operatation which should only be called when no 

654 apps are using the database because it would remove volatile content 

655 currently used by the apps. 

656 ''' 

657 

658 with self.transaction('remove volatile') as cursor: 

659 return cursor.execute( 

660 ''' 

661 DELETE FROM files 

662 WHERE path LIKE 'virtual:volatile:%' 

663 ''').rowcount 

664 

665 def reset(self, path, transaction=None): 

666 ''' 

667 Prune information associated with a given file, but keep the file path. 

668 

669 This method is called when reading a file failed. File attributes, 

670 format, size and modification time are set to NULL. File content 

671 meta-information is removed from the database and any attached live 

672 selections (via database triggers). 

673 ''' 

674 

675 path = self.relpath(abspath(path)) 

676 

677 with (transaction or self.transaction('reset file')) as cursor: 

678 cursor.execute( 

679 ''' 

680 UPDATE files SET 

681 format = NULL, 

682 mtime = NULL, 

683 size = NULL 

684 WHERE path = ? 

685 ''', (path,)) 

686 

687 def silent_touch(self, path): 

688 ''' 

689 Update modification time of file without initiating reindexing. 

690 

691 Useful to prolong validity period of data with expiration date. 

692 ''' 

693 

694 apath = abspath(path) 

695 path = self.relpath(apath) 

696 

697 with self.transaction('silent touch') as cursor: 

698 

699 sql = 'SELECT format, size FROM files WHERE path = ?' 

700 fmt, size = execute_get1(cursor, sql, (path,)) 

701 

702 mod = io.get_backend(fmt) 

703 mod.touch(apath) 

704 file_stats = mod.get_stats(apath) 

705 

706 if file_stats[1] != size: 

707 raise FileLoadError( 

708 'Silent update for file "%s" failed: size has changed.' 

709 % apath) 

710 

711 sql = ''' 

712 UPDATE files 

713 SET mtime = ? 

714 WHERE path = ? 

715 ''' 

716 cursor.execute(sql, (file_stats[0], path)) 

717 

718 def _iter_codes_info( 

719 self, kind=None, codes=None, kind_codes_count='kind_codes_count'): 

720 

721 args = [] 

722 sel = '' 

723 if kind is not None: 

724 kind_id = to_kind_id(kind) 

725 

726 sel = 'AND kind_codes.kind_id == ?' 

727 args.append(to_kind_id(kind)) 

728 

729 if codes is not None: 

730 assert kind is not None # TODO supp by recursing possible kinds 

731 kind_id = to_kind_id(kind) 

732 pats = codes_patterns_for_kind(kind_id, codes) 

733 

734 if pats: 

735 # could optimize this by using IN for non-patterns 

736 sel += ' AND ( %s ) ' % ' OR '.join( 

737 ('kind_codes.codes GLOB ?',) * len(pats)) 

738 

739 args.extend(pat.safe_str for pat in pats) 

740 

741 sql = (''' 

742 SELECT 

743 kind_codes.kind_id, 

744 kind_codes.codes, 

745 kind_codes.deltat, 

746 kind_codes.kind_codes_id, 

747 %(kind_codes_count)s.count 

748 FROM %(kind_codes_count)s 

749 INNER JOIN kind_codes 

750 ON %(kind_codes_count)s.kind_codes_id 

751 == kind_codes.kind_codes_id 

752 WHERE %(kind_codes_count)s.count > 0 

753 ''' + sel + ''' 

754 ''') % {'kind_codes_count': kind_codes_count} 

755 

756 for kind_id, scodes, deltat, kcid, count in self._conn.execute( 

757 sql, args): 

758 

759 yield ( 

760 kind_id, to_codes_simple(kind_id, scodes), deltat, kcid, count) 

761 

762 def _iter_deltats(self, kind=None, kind_codes_count='kind_codes_count'): 

763 args = [] 

764 sel = '' 

765 if kind is not None: 

766 assert isinstance(kind, str) 

767 sel = 'AND kind_codes.kind_id == ?' 

768 args.append(to_kind_id(kind)) 

769 

770 sql = (''' 

771 SELECT DISTINCT kind_codes.deltat FROM %(kind_codes_count)s 

772 INNER JOIN kind_codes 

773 ON %(kind_codes_count)s.kind_codes_id 

774 == kind_codes.kind_codes_id 

775 WHERE %(kind_codes_count)s.count > 0 

776 ''' + sel + ''' 

777 ORDER BY kind_codes.deltat 

778 ''') % {'kind_codes_count': kind_codes_count} 

779 

780 for row in self._conn.execute(sql, args): 

781 yield row[0] 

782 

783 def _iter_codes(self, kind=None, kind_codes_count='kind_codes_count'): 

784 args = [] 

785 sel = '' 

786 if kind is not None: 

787 assert isinstance(kind, str) 

788 sel = 'AND kind_codes.kind_id == ?' 

789 args.append(to_kind_id(kind)) 

790 

791 sql = (''' 

792 SELECT DISTINCT kind_codes.kind_id, kind_codes.codes 

793 FROM %(kind_codes_count)s 

794 INNER JOIN kind_codes 

795 ON %(kind_codes_count)s.kind_codes_id 

796 == kind_codes.kind_codes_id 

797 WHERE %(kind_codes_count)s.count > 0 

798 ''' + sel + ''' 

799 ORDER BY kind_codes.codes 

800 

801 ''') % dict(kind_codes_count=kind_codes_count) 

802 

803 for row in self._conn.execute(sql, args): 

804 yield to_codes_simple(*row) 

805 

806 def _iter_kinds(self, codes=None, kind_codes_count='kind_codes_count'): 

807 args = [] 

808 sel = '' 

809 if codes is not None: 

810 sel = 'AND kind_codes.codes == ?' 

811 args.append(codes.safe_str) 

812 

813 sql = (''' 

814 SELECT DISTINCT kind_codes.kind_id FROM %(kind_codes_count)s 

815 INNER JOIN kind_codes 

816 ON %(kind_codes_count)s.kind_codes_id 

817 == kind_codes.kind_codes_id 

818 WHERE %(kind_codes_count)s.count > 0 

819 ''' + sel + ''' 

820 ORDER BY kind_codes.kind_id 

821 ''') % {'kind_codes_count': kind_codes_count} 

822 

823 for row in self._conn.execute(sql, args): 

824 yield to_kind(row[0]) 

825 

826 def iter_paths(self): 

827 for row in self._conn.execute('''SELECT path FROM files'''): 

828 yield self.abspath(row[0]) 

829 

830 def iter_nnuts_by_file(self): 

831 sql = ''' 

832 SELECT 

833 path, 

834 (SELECT COUNT(*) FROM nuts WHERE nuts.file_id = files.file_id) 

835 FROM files 

836 ''' 

837 for row in self._conn.execute(sql): 

838 yield (self.abspath(row[0]),) + row[1:] 

839 

840 def iter_kinds(self, codes=None): 

841 return self._iter_kinds(codes=codes) 

842 

843 def iter_codes(self, kind=None): 

844 return self._iter_codes(kind=kind) 

845 

846 def get_paths(self): 

847 return list(self.iter_paths()) 

848 

849 def get_kinds(self, codes=None): 

850 return list(self.iter_kinds(codes=codes)) 

851 

852 def get_codes(self, kind=None): 

853 return list(self.iter_codes(kind=kind)) 

854 

855 def get_counts(self, kind=None): 

856 d = {} 

857 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

858 if kind_id not in d: 

859 v = d[kind_id] = {} 

860 else: 

861 v = d[kind_id] 

862 

863 if codes not in v: 

864 v[codes] = 0 

865 

866 v[codes] += count 

867 

868 if kind is not None: 

869 return d[to_kind_id(kind)] 

870 else: 

871 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

872 

873 def get_nfiles(self): 

874 sql = '''SELECT COUNT(*) FROM files''' 

875 for row in self._conn.execute(sql): 

876 return row[0] 

877 

878 def get_nnuts(self): 

879 sql = '''SELECT COUNT(*) FROM nuts''' 

880 for row in self._conn.execute(sql): 

881 return row[0] 

882 

883 def get_nnuts_by_file(self): 

884 return list(self.iter_nnuts_by_file()) 

885 

886 def get_total_size(self): 

887 sql = ''' 

888 SELECT SUM(files.size) FROM files 

889 ''' 

890 

891 for row in self._conn.execute(sql): 

892 return row[0] or 0 

893 

894 def get_persistent_names(self): 

895 sql = ''' 

896 SELECT name FROM persistent 

897 ''' 

898 return [row[0] for row in self._conn.execute(sql)] 

899 

900 def get_stats(self): 

901 return DatabaseStats( 

902 nfiles=self.get_nfiles(), 

903 nnuts=self.get_nnuts(), 

904 kinds=self.get_kinds(), 

905 codes=self.get_codes(), 

906 counts=self.get_counts(), 

907 total_size=self.get_total_size(), 

908 persistent=self.get_persistent_names()) 

909 

910 def __str__(self): 

911 return str(self.get_stats()) 

912 

913 def print_tables(self, stream=None): 

914 for table in [ 

915 'persistent', 

916 'files', 

917 'nuts', 

918 'kind_codes', 

919 'kind_codes_count']: 

920 

921 self.print_table(table, stream=stream) 

922 

923 def print_table(self, name, stream=None): 

924 

925 if stream is None: 

926 stream = sys.stdout 

927 

928 class hstr(str): 

929 def __repr__(self): 

930 return self 

931 

932 w = stream.write 

933 w('\n') 

934 w('\n') 

935 w(name) 

936 w('\n') 

937 sql = 'SELECT * FROM %s' % name 

938 tab = [] 

939 if name in self._tables: 

940 headers = self._tables[name] 

941 tab.append([None for _ in headers]) 

942 tab.append([hstr(x[0]) for x in headers]) 

943 tab.append([hstr(x[1]) for x in headers]) 

944 tab.append([None for _ in headers]) 

945 

946 for row in self._conn.execute(sql): 

947 tab.append([x for x in row]) 

948 

949 widths = [ 

950 max((len(repr(x)) if x is not None else 0) for x in col) 

951 for col in zip(*tab)] 

952 

953 for row in tab: 

954 w(' '.join( 

955 (repr(x).ljust(wid) if x is not None else ''.ljust(wid, '-')) 

956 for (x, wid) in zip(row, widths))) 

957 

958 w('\n') 

959 

960 w('\n') 

961 

962 

963class DatabaseStats(Object): 

964 ''' 

965 Container to hold statistics about contents cached in meta-information db. 

966 ''' 

967 

968 nfiles = Int.T( 

969 help='Number of files in database.') 

970 nnuts = Int.T( 

971 help='Number of index nuts in database.') 

972 codes = List.T( 

973 Tuple.T(content_t=String.T()), 

974 help='Available code sequences in database, e.g. ' 

975 '(agency, network, station, location) for stations nuts.') 

976 kinds = List.T( 

977 String.T(), 

978 help='Available content types in database.') 

979 total_size = Int.T( 

980 help='Aggregated file size [bytes] of files referenced in database.') 

981 counts = Dict.T( 

982 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

983 help='Breakdown of how many nuts of any content type and code ' 

984 'sequence are available in database, ``counts[kind][codes]``.') 

985 persistent = List.T( 

986 String.T(), 

987 help='Names of persistent selections stored in database.') 

988 

989 def __str__(self): 

990 kind_counts = dict( 

991 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

992 

993 codes = [c.safe_str for c in self.codes] 

994 

995 if len(codes) > 20: 

996 scodes = '\n' + util.ewrap(codes[:10], indent=' ') \ 

997 + '\n [%i more]\n' % (len(codes) - 20) \ 

998 + util.ewrap(codes[-10:], indent=' ') 

999 else: 

1000 scodes = '\n' + util.ewrap(codes, indent=' ') \ 

1001 if codes else '<none>' 

1002 

1003 s = ''' 

1004Available codes: %s 

1005Number of files: %i 

1006Total size of known files: %s 

1007Number of index nuts: %i 

1008Available content kinds: %s 

1009Persistent selections: %s''' % ( 

1010 scodes, 

1011 self.nfiles, 

1012 util.human_bytesize(self.total_size), 

1013 self.nnuts, 

1014 ', '.join('%s: %i' % ( 

1015 kind, kind_counts[kind]) for kind in sorted(self.kinds)), 

1016 ', '.join(self.persistent)) 

1017 

1018 return s 

1019 

1020 

1021__all__ = [ 

1022 'Database', 

1023 'DatabaseStats', 

1024]