1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6import sys 

7import os 

8import logging 

9import sqlite3 

10import re 

11import time 

12import types 

13import weakref 

14 

15from pyrocko.io.io_common import FileLoadError 

16from pyrocko import util 

17from pyrocko.guts import Object, Int, List, Dict, Tuple, String 

18from . import error, io 

19from .model import Nut, to_kind_id, to_kind, to_codes_simple, \ 

20 codes_patterns_for_kind 

21from .error import SquirrelError 

22 

23logger = logging.getLogger('psq.database') 

24 

25guts_prefix = 'squirrel' 

26 

27 

28def abspath(path): 

29 if not path.startswith('virtual:') and not path.startswith('client:'): 

30 return os.path.abspath(path) 

31 else: 

32 return path 

33 

34 

35def versiontuple(s): 

36 fill = [0, 0, 0] 

37 vals = [int(x) for x in s.split('.')] 

38 fill[:len(vals)] = vals 

39 return tuple(fill) 

40 

41 

42class ExecuteGet1Error(SquirrelError): 

43 pass 

44 

45 

46def execute_get1(connection, sql, args=()): 

47 rows = list(connection.execute(sql, args)) 

48 if len(rows) == 1: 

49 return rows[0] 

50 else: 

51 raise ExecuteGet1Error('Expected database entry not found.') 

52 

53 

54g_databases = {} 

55 

56 

57def get_database(path): 

58 path = os.path.abspath(path) 

59 if path not in g_databases: 

60 g_databases[path] = Database(path) 

61 

62 return g_databases[path] 

63 

64 

65def close_database(database): 

66 path = os.path.abspath(database._database_path) 

67 database._conn.close() 

68 if path in g_databases: 

69 del g_databases[path] 

70 

71 

72class Transaction(object): 

73 def __init__( 

74 self, conn, 

75 label='', 

76 mode='immediate', 

77 retry_interval=0.1, 

78 callback=None): 

79 

80 self.cursor = conn.cursor() 

81 assert mode in ('deferred', 'immediate', 'exclusive') 

82 self.mode = mode 

83 self.depth = 0 

84 self.rollback_wanted = False 

85 self.retry_interval = retry_interval 

86 self.callback = callback 

87 self.label = label 

88 self.started = False 

89 

90 def begin(self): 

91 if self.depth == 0: 

92 tries = 0 

93 while True: 

94 try: 

95 tries += 1 

96 self.cursor.execute('BEGIN %s' % self.mode.upper()) 

97 self.started = True 

98 logger.debug( 

99 'Transaction started: %-30s (pid: %s, mode: %s)' 

100 % (self.label, os.getpid(), self.mode)) 

101 

102 self.total_changes_begin \ 

103 = self.cursor.connection.total_changes 

104 break 

105 

106 except sqlite3.OperationalError as e: 

107 if not str(e) == 'database is locked': 

108 raise 

109 

110 logger.info( 

111 'Database is locked retrying in %s s: %s ' 

112 '(pid: %s, tries: %i)' % ( 

113 self.retry_interval, self.label, 

114 os.getpid(), tries)) 

115 

116 time.sleep(self.retry_interval) 

117 

118 self.depth += 1 

119 

120 def commit(self): 

121 if not self.started: 

122 raise Exception( 

123 'Trying to commit without having started a transaction.') 

124 

125 self.depth -= 1 

126 if self.depth == 0: 

127 if not self.rollback_wanted: 

128 self.cursor.execute('COMMIT') 

129 self.started = False 

130 if self.total_changes_begin is not None: 

131 total_changes = self.cursor.connection.total_changes \ 

132 - self.total_changes_begin 

133 else: 

134 total_changes = None 

135 

136 if self.callback is not None and total_changes: 

137 self.callback('modified', total_changes) 

138 

139 logger.debug( 

140 'Transaction completed: %-30s ' 

141 '(pid: %s, changes: %i)' % ( 

142 self.label, os.getpid(), total_changes or 0)) 

143 

144 else: 

145 self.cursor.execute('ROLLBACK') 

146 self.started = False 

147 logger.warning('Deferred rollback executed.') 

148 logger.debug( 

149 'Transaction failed: %-30s (pid: %s)' % ( 

150 self.label, os.getpid())) 

151 self.rollback_wanted = False 

152 

153 def rollback(self): 

154 if not self.started: 

155 raise Exception( 

156 'Trying to rollback without having started a transaction.') 

157 

158 self.depth -= 1 

159 if self.depth == 0: 

160 self.cursor.execute('ROLLBACK') 

161 self.started = False 

162 

163 logger.debug( 

164 'Transaction failed: %-30s (pid: %s)' % ( 

165 self.label, os.getpid())) 

166 

167 self.rollback_wanted = False 

168 else: 

169 logger.warning('Deferred rollback scheduled.') 

170 self.rollback_wanted = True 

171 

172 def close(self): 

173 self.cursor.close() 

174 

175 def __enter__(self): 

176 self.begin() 

177 return self.cursor 

178 

179 def __exit__(self, exc_type, exc_value, traceback): 

180 if exc_type is None: 

181 self.commit() 

182 else: 

183 self.rollback() 

184 

185 if self.depth == 0: 

186 self.close() 

187 self.callback = None 

188 

189 

190class Database(object): 

191 ''' 

192 Shared meta-information database used by Squirrel. 

193 ''' 

194 

195 def __init__(self, database_path=':memory:', log_statements=False): 

196 self._database_path = database_path 

197 if database_path != ':memory:': 

198 util.ensuredirs(database_path) 

199 

200 try: 

201 logger.debug('Opening connection to database: %s' % database_path) 

202 self._conn = sqlite3.connect(database_path, isolation_level=None) 

203 except sqlite3.OperationalError: 

204 raise error.SquirrelError( 

205 'Cannot connect to database: %s' % database_path) 

206 

207 self._conn.text_factory = str 

208 self._tables = {} 

209 

210 if log_statements: 

211 self._conn.set_trace_callback(self._log_statement) 

212 

213 self._listeners = [] 

214 self._initialize_db() 

215 self._basepath = None 

216 

217 self.version = None 

218 

219 def set_basepath(self, basepath): 

220 if basepath is not None: 

221 self._basepath = os.path.abspath(basepath) 

222 else: 

223 self._basepath = None 

224 

225 def relpath(self, path): 

226 if self._basepath is not None and path.startswith( 

227 self._basepath + os.path.sep): 

228 return path[len(self._basepath) + 1:] 

229 else: 

230 return path 

231 

232 def abspath(self, path): 

233 if self._basepath is not None and not path.startswith('virtual:') \ 

234 and not path.startswith('client:') \ 

235 and not os.path.isabs(path): 

236 return os.path.join(self._basepath, path) 

237 else: 

238 return path 

239 

240 def _log_statement(self, statement): 

241 logger.debug(statement) 

242 

243 def get_connection(self): 

244 return self._conn 

245 

246 def transaction(self, label='', mode='immediate'): 

247 return Transaction( 

248 self._conn, 

249 label=label, 

250 mode=mode, 

251 callback=self._notify_listeners) 

252 

253 def add_listener(self, listener): 

254 if isinstance(listener, types.MethodType): 

255 listener_ref = weakref.WeakMethod(listener) 

256 else: 

257 listener_ref = weakref.ref(listener) 

258 

259 self._listeners.append(listener_ref) 

260 return listener_ref 

261 

262 def remove_listener(self, listener_ref): 

263 self._listeners.remove(listener_ref) 

264 

265 def _notify_listeners(self, event, *args): 

266 dead = [] 

267 for listener_ref in self._listeners: 

268 listener = listener_ref() 

269 if listener is not None: 

270 listener(event, *args) 

271 else: 

272 dead.append(listener_ref) 

273 

274 for listener_ref in dead: 

275 self.remove_listener(listener_ref) 

276 

277 def _register_table(self, s): 

278 m = re.search(r'(\S+)\s*\(([^)]+)\)', s) 

279 table_name = m.group(1) 

280 dtypes = m.group(2) 

281 table_header = [] 

282 for dele in dtypes.split(','): 

283 table_header.append(dele.split()[:2]) 

284 

285 self._tables[table_name] = table_header 

286 

287 return s 

288 

289 def _initialize_db(self): 

290 with self.transaction('initialize') as cursor: 

291 cursor.execute( 

292 '''PRAGMA recursive_triggers = true''') 

293 

294 cursor.execute( 

295 '''PRAGMA busy_timeout = 30000''') 

296 

297 if 2 == len(list( 

298 cursor.execute( 

299 ''' 

300 SELECT name FROM sqlite_master 

301 WHERE type = 'table' AND name IN ( 

302 'files', 

303 'persistent') 

304 '''))): 

305 

306 try: 

307 self.version = versiontuple(execute_get1( 

308 cursor, 

309 ''' 

310 SELECT value FROM settings 

311 WHERE key == "version" 

312 ''')[0]) 

313 except sqlite3.OperationalError: 

314 raise error.SquirrelError( 

315 'Squirrel database in pre-release format found: %s\n' 

316 'Please remove the database file and reindex.' 

317 % self._database_path) 

318 

319 if self.version >= (1, 1, 0): 

320 raise error.SquirrelError( 

321 'Squirrel database "%s" is of version %i.%i.%i which ' 

322 'is not supported by this version of Pyrocko. Please ' 

323 'upgrade the Pyrocko library.' 

324 % ((self._database_path, ) + self.version)) 

325 

326 return 

327 

328 cursor.execute(self._register_table( 

329 ''' 

330 CREATE TABLE IF NOT EXISTS settings ( 

331 key text PRIMARY KEY, 

332 value text) 

333 ''')) 

334 

335 cursor.execute( 

336 'INSERT OR IGNORE INTO settings VALUES ("version", "1.0")') 

337 

338 self.version = execute_get1( 

339 cursor, 

340 'SELECT value FROM settings WHERE key == "version"') 

341 

342 cursor.execute(self._register_table( 

343 ''' 

344 CREATE TABLE IF NOT EXISTS files ( 

345 file_id integer PRIMARY KEY, 

346 path text, 

347 format text, 

348 mtime float, 

349 size integer) 

350 ''')) 

351 

352 cursor.execute( 

353 ''' 

354 CREATE UNIQUE INDEX IF NOT EXISTS index_files_path 

355 ON files (path) 

356 ''') 

357 

358 cursor.execute(self._register_table( 

359 ''' 

360 CREATE TABLE IF NOT EXISTS nuts ( 

361 nut_id integer PRIMARY KEY AUTOINCREMENT, 

362 file_id integer, 

363 file_segment integer, 

364 file_element integer, 

365 kind_id integer, 

366 kind_codes_id integer, 

367 tmin_seconds integer, 

368 tmin_offset integer, 

369 tmax_seconds integer, 

370 tmax_offset integer, 

371 kscale integer) 

372 ''')) 

373 

374 cursor.execute( 

375 ''' 

376 CREATE UNIQUE INDEX IF NOT EXISTS index_nuts_file_element 

377 ON nuts (file_id, file_segment, file_element) 

378 ''') 

379 

380 cursor.execute(self._register_table( 

381 ''' 

382 CREATE TABLE IF NOT EXISTS kind_codes ( 

383 kind_codes_id integer PRIMARY KEY, 

384 kind_id integer, 

385 codes text, 

386 deltat float) 

387 ''')) 

388 

389 cursor.execute( 

390 ''' 

391 CREATE UNIQUE INDEX IF NOT EXISTS index_kind_codes 

392 ON kind_codes (kind_id, codes, deltat) 

393 ''') 

394 

395 cursor.execute(self._register_table( 

396 ''' 

397 CREATE TABLE IF NOT EXISTS kind_codes_count ( 

398 kind_codes_id integer PRIMARY KEY, 

399 count integer) 

400 ''')) 

401 

402 cursor.execute( 

403 ''' 

404 CREATE INDEX IF NOT EXISTS index_nuts_file_id 

405 ON nuts (file_id) 

406 ''') 

407 

408 cursor.execute( 

409 ''' 

410 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_delete_file 

411 BEFORE DELETE ON files FOR EACH ROW 

412 BEGIN 

413 DELETE FROM nuts where file_id == old.file_id; 

414 END 

415 ''') 

416 

417 # trigger only on size to make silent update of mtime possible 

418 cursor.execute( 

419 ''' 

420 CREATE TRIGGER IF NOT EXISTS delete_nuts_on_update_file 

421 BEFORE UPDATE OF size ON files FOR EACH ROW 

422 BEGIN 

423 DELETE FROM nuts where file_id == old.file_id; 

424 END 

425 ''') 

426 

427 cursor.execute( 

428 ''' 

429 CREATE TRIGGER IF NOT EXISTS increment_kind_codes 

430 BEFORE INSERT ON nuts FOR EACH ROW 

431 BEGIN 

432 INSERT OR IGNORE INTO kind_codes_count 

433 VALUES (new.kind_codes_id, 0); 

434 UPDATE kind_codes_count 

435 SET count = count + 1 

436 WHERE new.kind_codes_id == kind_codes_id; 

437 END 

438 ''') 

439 

440 cursor.execute( 

441 ''' 

442 CREATE TRIGGER IF NOT EXISTS decrement_kind_codes 

443 BEFORE DELETE ON nuts FOR EACH ROW 

444 BEGIN 

445 UPDATE kind_codes_count 

446 SET count = count - 1 

447 WHERE old.kind_codes_id == kind_codes_id; 

448 END 

449 ''') 

450 

451 cursor.execute(self._register_table( 

452 ''' 

453 CREATE TABLE IF NOT EXISTS persistent ( 

454 name text UNIQUE) 

455 ''')) 

456 

457 def dig(self, nuts, transaction=None): 

458 ''' 

459 Store or update content meta-information. 

460 

461 Given ``nuts`` are assumed to represent an up-to-date and complete 

462 inventory of a set of files. Any old information about these files is 

463 first pruned from the database (via database triggers). If such content 

464 is part of a live selection, it is also removed there. Then the new 

465 content meta-information is inserted into the main database. The 

466 content is not automatically inserted into the live selections again. 

467 It is in the responsibility of the selection object to perform this 

468 step. 

469 ''' 

470 

471 nuts = list(nuts) 

472 

473 if not nuts: 

474 return 

475 

476 files = set() 

477 kind_codes = set() 

478 for nut in nuts: 

479 files.add(( 

480 self.relpath(nut.file_path), 

481 nut.file_format, 

482 nut.file_mtime, 

483 nut.file_size)) 

484 kind_codes.add( 

485 (nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0)) 

486 

487 with (transaction or self.transaction('dig')) as c: 

488 

489 c.executemany( 

490 'INSERT OR IGNORE INTO files VALUES (NULL,?,?,?,?)', files) 

491 

492 c.executemany( 

493 '''UPDATE files SET 

494 format = ?, mtime = ?, size = ? 

495 WHERE path == ? 

496 ''', 

497 ((x[1], x[2], x[3], x[0]) for x in files)) 

498 

499 c.executemany( 

500 'INSERT OR IGNORE INTO kind_codes VALUES (NULL,?,?,?)', 

501 kind_codes) 

502 

503 c.executemany( 

504 ''' 

505 INSERT INTO nuts VALUES 

506 (NULL, ( 

507 SELECT file_id FROM files 

508 WHERE path == ? 

509 ),?,?,?, 

510 ( 

511 SELECT kind_codes_id FROM kind_codes 

512 WHERE kind_id == ? AND codes == ? AND deltat == ? 

513 ), ?,?,?,?,?) 

514 ''', 

515 ((self.relpath(nut.file_path), 

516 nut.file_segment, nut.file_element, 

517 nut.kind_id, 

518 nut.kind_id, nut.codes.safe_str, nut.deltat or 0.0, 

519 nut.tmin_seconds, nut.tmin_offset, 

520 nut.tmax_seconds, nut.tmax_offset, 

521 nut.kscale) for nut in nuts)) 

522 

523 def undig(self, path): 

524 

525 path = self.relpath(abspath(path)) 

526 

527 sql = ''' 

528 SELECT 

529 files.path, 

530 files.format, 

531 files.mtime, 

532 files.size, 

533 nuts.file_segment, 

534 nuts.file_element, 

535 kind_codes.kind_id, 

536 kind_codes.codes, 

537 nuts.tmin_seconds, 

538 nuts.tmin_offset, 

539 nuts.tmax_seconds, 

540 nuts.tmax_offset, 

541 kind_codes.deltat 

542 FROM files 

543 INNER JOIN nuts ON files.file_id = nuts.file_id 

544 INNER JOIN kind_codes 

545 ON nuts.kind_codes_id == kind_codes.kind_codes_id 

546 WHERE path == ? 

547 ''' 

548 

549 return [Nut(values_nocheck=(self.abspath(row[0]),) + row[1:]) 

550 for row in self._conn.execute(sql, (path,))] 

551 

552 def undig_all(self): 

553 sql = ''' 

554 SELECT 

555 files.path, 

556 files.format, 

557 files.mtime, 

558 files.size, 

559 nuts.file_segment, 

560 nuts.file_element, 

561 kind_codes.kind_id, 

562 kind_codes.codes, 

563 nuts.tmin_seconds, 

564 nuts.tmin_offset, 

565 nuts.tmax_seconds, 

566 nuts.tmax_offset, 

567 kind_codes.deltat 

568 FROM files 

569 INNER JOIN nuts ON files.file_id == nuts.file_id 

570 INNER JOIN kind_codes 

571 ON nuts.kind_codes_id == kind_codes.kind_codes_id 

572 ''' 

573 

574 nuts = [] 

575 path = None 

576 for values in self._conn.execute(sql): 

577 if path is not None and values[0] != path: 

578 yield path, nuts 

579 nuts = [] 

580 

581 path = self.abspath(values[0]) 

582 

583 if values[1] is not None: 

584 nuts.append(Nut(values_nocheck=(path,) + values[1:])) 

585 

586 if path is not None: 

587 yield path, nuts 

588 

589 def undig_few(self, paths, format='detect'): 

590 for path in paths: 

591 nuts = self.undig(path) 

592 if nuts: 

593 yield (nuts[0].file_format, path), nuts 

594 else: 

595 yield (format, path), [] 

596 

597 def undig_many(self, paths, show_progress=True): 

598 selection = self.new_selection(paths, show_progress=show_progress) 

599 

600 for (_, path), nuts in selection.undig_grouped(): 

601 yield path, nuts 

602 

603 del selection 

604 

605 def new_selection(self, paths=None, format='detect', show_progress=True): 

606 from .selection import Selection 

607 selection = Selection(self) 

608 if paths: 

609 selection.add(paths, format=format, show_progress=show_progress) 

610 return selection 

611 

612 def undig_content(self, nut): 

613 return None 

614 

615 def remove(self, path): 

616 ''' 

617 Prune content meta-information about a given file. 

618 

619 All content pieces belonging to file ``path`` are removed from the 

620 main database and any attached live selections (via database triggers). 

621 ''' 

622 

623 path = self.relpath(abspath(path)) 

624 

625 with self.transaction('remove file') as cursor: 

626 cursor.execute( 

627 'DELETE FROM files WHERE path = ?', (path,)) 

628 

629 def remove_glob(self, pattern): 

630 ''' 

631 Prune content meta-information about files matching given pattern. 

632 

633 All content pieces belonging to files who's pathes match the given 

634 ``pattern`` are removed from the main database and any attached live 

635 selections (via database triggers). 

636 ''' 

637 

638 with self.transaction('remove file glob') as cursor: 

639 return cursor.execute( 

640 'DELETE FROM files WHERE path GLOB ?', (pattern,)).rowcount 

641 

642 def _remove_volatile(self): 

643 ''' 

644 Prune leftover volatile content from database. 

645 

646 If the cleanup handler of an attached selection is not called, e.g. due 

647 to a crash or terminated process, volatile content will not be removed 

648 properly. This method will delete such leftover entries. 

649 

650 This is a mainenance operatation which should only be called when no 

651 apps are using the database because it would remove volatile content 

652 currently used by the apps. 

653 ''' 

654 

655 with self.transaction('remove volatile') as cursor: 

656 return cursor.execute( 

657 ''' 

658 DELETE FROM files 

659 WHERE path LIKE 'virtual:volatile:%' 

660 ''').rowcount 

661 

662 def reset(self, path, transaction=None): 

663 ''' 

664 Prune information associated with a given file, but keep the file path. 

665 

666 This method is called when reading a file failed. File attributes, 

667 format, size and modification time are set to NULL. File content 

668 meta-information is removed from the database and any attached live 

669 selections (via database triggers). 

670 ''' 

671 

672 path = self.relpath(abspath(path)) 

673 

674 with (transaction or self.transaction('reset file')) as cursor: 

675 cursor.execute( 

676 ''' 

677 UPDATE files SET 

678 format = NULL, 

679 mtime = NULL, 

680 size = NULL 

681 WHERE path = ? 

682 ''', (path,)) 

683 

684 def silent_touch(self, path): 

685 ''' 

686 Update modification time of file without initiating reindexing. 

687 

688 Useful to prolong validity period of data with expiration date. 

689 ''' 

690 

691 apath = abspath(path) 

692 path = self.relpath(apath) 

693 

694 with self.transaction('silent touch') as cursor: 

695 

696 sql = 'SELECT format, size FROM files WHERE path = ?' 

697 fmt, size = execute_get1(cursor, sql, (path,)) 

698 

699 mod = io.get_backend(fmt) 

700 mod.touch(apath) 

701 file_stats = mod.get_stats(apath) 

702 

703 if file_stats[1] != size: 

704 raise FileLoadError( 

705 'Silent update for file "%s" failed: size has changed.' 

706 % apath) 

707 

708 sql = ''' 

709 UPDATE files 

710 SET mtime = ? 

711 WHERE path = ? 

712 ''' 

713 cursor.execute(sql, (file_stats[0], path)) 

714 

715 def _iter_codes_info( 

716 self, kind=None, codes=None, kind_codes_count='kind_codes_count'): 

717 

718 args = [] 

719 sel = '' 

720 if kind is not None: 

721 kind_id = to_kind_id(kind) 

722 

723 sel = 'AND kind_codes.kind_id == ?' 

724 args.append(to_kind_id(kind)) 

725 

726 if codes is not None: 

727 assert kind is not None # TODO supp by recursing possible kinds 

728 kind_id = to_kind_id(kind) 

729 pats = codes_patterns_for_kind(kind_id, codes) 

730 

731 if pats: 

732 # could optimize this by using IN for non-patterns 

733 sel += ' AND ( %s ) ' % ' OR '.join( 

734 ('kind_codes.codes GLOB ?',) * len(pats)) 

735 

736 args.extend(pat.safe_str for pat in pats) 

737 

738 sql = (''' 

739 SELECT 

740 kind_codes.kind_id, 

741 kind_codes.codes, 

742 kind_codes.deltat, 

743 kind_codes.kind_codes_id, 

744 %(kind_codes_count)s.count 

745 FROM %(kind_codes_count)s 

746 INNER JOIN kind_codes 

747 ON %(kind_codes_count)s.kind_codes_id 

748 == kind_codes.kind_codes_id 

749 WHERE %(kind_codes_count)s.count > 0 

750 ''' + sel + ''' 

751 ''') % {'kind_codes_count': kind_codes_count} 

752 

753 for kind_id, scodes, deltat, kcid, count in self._conn.execute( 

754 sql, args): 

755 

756 yield ( 

757 kind_id, to_codes_simple(kind_id, scodes), deltat, kcid, count) 

758 

759 def _iter_deltats(self, kind=None, kind_codes_count='kind_codes_count'): 

760 args = [] 

761 sel = '' 

762 if kind is not None: 

763 assert isinstance(kind, str) 

764 sel = 'AND kind_codes.kind_id == ?' 

765 args.append(to_kind_id(kind)) 

766 

767 sql = (''' 

768 SELECT DISTINCT kind_codes.deltat FROM %(kind_codes_count)s 

769 INNER JOIN kind_codes 

770 ON %(kind_codes_count)s.kind_codes_id 

771 == kind_codes.kind_codes_id 

772 WHERE %(kind_codes_count)s.count > 0 

773 ''' + sel + ''' 

774 ORDER BY kind_codes.deltat 

775 ''') % {'kind_codes_count': kind_codes_count} 

776 

777 for row in self._conn.execute(sql, args): 

778 yield row[0] 

779 

780 def _iter_codes(self, kind=None, kind_codes_count='kind_codes_count'): 

781 args = [] 

782 sel = '' 

783 if kind is not None: 

784 assert isinstance(kind, str) 

785 sel = 'AND kind_codes.kind_id == ?' 

786 args.append(to_kind_id(kind)) 

787 

788 sql = (''' 

789 SELECT DISTINCT kind_codes.kind_id, kind_codes.codes 

790 FROM %(kind_codes_count)s 

791 INNER JOIN kind_codes 

792 ON %(kind_codes_count)s.kind_codes_id 

793 == kind_codes.kind_codes_id 

794 WHERE %(kind_codes_count)s.count > 0 

795 ''' + sel + ''' 

796 ORDER BY kind_codes.codes 

797 

798 ''') % dict(kind_codes_count=kind_codes_count) 

799 

800 for row in self._conn.execute(sql, args): 

801 yield to_codes_simple(*row) 

802 

803 def _iter_kinds(self, codes=None, kind_codes_count='kind_codes_count'): 

804 args = [] 

805 sel = '' 

806 if codes is not None: 

807 sel = 'AND kind_codes.codes == ?' 

808 args.append(codes.safe_str) 

809 

810 sql = (''' 

811 SELECT DISTINCT kind_codes.kind_id FROM %(kind_codes_count)s 

812 INNER JOIN kind_codes 

813 ON %(kind_codes_count)s.kind_codes_id 

814 == kind_codes.kind_codes_id 

815 WHERE %(kind_codes_count)s.count > 0 

816 ''' + sel + ''' 

817 ORDER BY kind_codes.kind_id 

818 ''') % {'kind_codes_count': kind_codes_count} 

819 

820 for row in self._conn.execute(sql, args): 

821 yield to_kind(row[0]) 

822 

823 def iter_paths(self): 

824 for row in self._conn.execute('''SELECT path FROM files'''): 

825 yield self.abspath(row[0]) 

826 

827 def iter_nnuts_by_file(self): 

828 sql = ''' 

829 SELECT 

830 path, 

831 (SELECT COUNT(*) FROM nuts WHERE nuts.file_id = files.file_id) 

832 FROM files 

833 ''' 

834 for row in self._conn.execute(sql): 

835 yield (self.abspath(row[0]),) + row[1:] 

836 

837 def iter_kinds(self, codes=None): 

838 return self._iter_kinds(codes=codes) 

839 

840 def iter_codes(self, kind=None): 

841 return self._iter_codes(kind=kind) 

842 

843 def get_paths(self): 

844 return list(self.iter_paths()) 

845 

846 def get_kinds(self, codes=None): 

847 return list(self.iter_kinds(codes=codes)) 

848 

849 def get_codes(self, kind=None): 

850 return list(self.iter_codes(kind=kind)) 

851 

852 def get_counts(self, kind=None): 

853 d = {} 

854 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

855 if kind_id not in d: 

856 v = d[kind_id] = {} 

857 else: 

858 v = d[kind_id] 

859 

860 if codes not in v: 

861 v[codes] = 0 

862 

863 v[codes] += count 

864 

865 if kind is not None: 

866 return d[to_kind_id(kind)] 

867 else: 

868 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

869 

870 def get_nfiles(self): 

871 sql = '''SELECT COUNT(*) FROM files''' 

872 for row in self._conn.execute(sql): 

873 return row[0] 

874 

875 def get_nnuts(self): 

876 sql = '''SELECT COUNT(*) FROM nuts''' 

877 for row in self._conn.execute(sql): 

878 return row[0] 

879 

880 def get_nnuts_by_file(self): 

881 return list(self.iter_nnuts_by_file()) 

882 

883 def get_total_size(self): 

884 sql = ''' 

885 SELECT SUM(files.size) FROM files 

886 ''' 

887 

888 for row in self._conn.execute(sql): 

889 return row[0] or 0 

890 

891 def get_persistent_names(self): 

892 sql = ''' 

893 SELECT name FROM persistent 

894 ''' 

895 return [row[0] for row in self._conn.execute(sql)] 

896 

897 def get_stats(self): 

898 return DatabaseStats( 

899 nfiles=self.get_nfiles(), 

900 nnuts=self.get_nnuts(), 

901 kinds=self.get_kinds(), 

902 codes=self.get_codes(), 

903 counts=self.get_counts(), 

904 total_size=self.get_total_size(), 

905 persistent=self.get_persistent_names()) 

906 

907 def __str__(self): 

908 return str(self.get_stats()) 

909 

910 def print_tables(self, stream=None): 

911 for table in [ 

912 'persistent', 

913 'files', 

914 'nuts', 

915 'kind_codes', 

916 'kind_codes_count']: 

917 

918 self.print_table(table, stream=stream) 

919 

920 def print_table(self, name, stream=None): 

921 

922 if stream is None: 

923 stream = sys.stdout 

924 

925 class hstr(str): 

926 def __repr__(self): 

927 return self 

928 

929 w = stream.write 

930 w('\n') 

931 w('\n') 

932 w(name) 

933 w('\n') 

934 sql = 'SELECT * FROM %s' % name 

935 tab = [] 

936 if name in self._tables: 

937 headers = self._tables[name] 

938 tab.append([None for _ in headers]) 

939 tab.append([hstr(x[0]) for x in headers]) 

940 tab.append([hstr(x[1]) for x in headers]) 

941 tab.append([None for _ in headers]) 

942 

943 for row in self._conn.execute(sql): 

944 tab.append([x for x in row]) 

945 

946 widths = [ 

947 max((len(repr(x)) if x is not None else 0) for x in col) 

948 for col in zip(*tab)] 

949 

950 for row in tab: 

951 w(' '.join( 

952 (repr(x).ljust(wid) if x is not None else ''.ljust(wid, '-')) 

953 for (x, wid) in zip(row, widths))) 

954 

955 w('\n') 

956 

957 w('\n') 

958 

959 

960class DatabaseStats(Object): 

961 ''' 

962 Container to hold statistics about contents cached in meta-information db. 

963 ''' 

964 

965 nfiles = Int.T( 

966 help='Number of files in database.') 

967 nnuts = Int.T( 

968 help='Number of index nuts in database.') 

969 codes = List.T( 

970 Tuple.T(content_t=String.T()), 

971 help='Available code sequences in database, e.g. ' 

972 '(agency, network, station, location) for stations nuts.') 

973 kinds = List.T( 

974 String.T(), 

975 help='Available content types in database.') 

976 total_size = Int.T( 

977 help='Aggregated file size [bytes] of files referenced in database.') 

978 counts = Dict.T( 

979 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

980 help='Breakdown of how many nuts of any content type and code ' 

981 'sequence are available in database, ``counts[kind][codes]``.') 

982 persistent = List.T( 

983 String.T(), 

984 help='Names of persistent selections stored in database.') 

985 

986 def __str__(self): 

987 kind_counts = dict( 

988 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

989 

990 codes = [c.safe_str for c in self.codes] 

991 

992 if len(codes) > 20: 

993 scodes = '\n' + util.ewrap(codes[:10], indent=' ') \ 

994 + '\n [%i more]\n' % (len(codes) - 20) \ 

995 + util.ewrap(codes[-10:], indent=' ') 

996 else: 

997 scodes = '\n' + util.ewrap(codes, indent=' ') \ 

998 if codes else '<none>' 

999 

1000 s = ''' 

1001Available codes: %s 

1002Number of files: %i 

1003Total size of known files: %s 

1004Number of index nuts: %i 

1005Available content kinds: %s 

1006Persistent selections: %s''' % ( 

1007 scodes, 

1008 self.nfiles, 

1009 util.human_bytesize(self.total_size), 

1010 self.nnuts, 

1011 ', '.join('%s: %i' % ( 

1012 kind, kind_counts[kind]) for kind in sorted(self.kinds)), 

1013 ', '.join(self.persistent)) 

1014 

1015 return s 

1016 

1017 

1018__all__ = [ 

1019 'Database', 

1020 'DatabaseStats', 

1021]