Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/dataset/gshhg.py: 95%

269 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2024-01-15 12:05 +0000

1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7Interface to the `GSHHG <https://www.soest.hawaii.edu/pwessel/gshhg/>`_ 

8(coastlines, rivers and borders) database. 

9 

10The Global Self-consistent Hierarchical High-resolution Geography Database 

11(GSHHG) is a collection of polygons representing land, lakes, rivers and 

12political borders. 

13 

14If the database is not already available, it will be downloaded 

15automatically on first use. 

16 

17For more information about GSHHG, see 

18http://www.soest.hawaii.edu/pwessel/gshhg/. 

19 

20.. rubric:: Citation 

21 

22If you use this dataset, please cite 

23 

24Wessel, P., and W. H. F. Smith, A Global Self-consistent, Hierarchical, 

25High-resolution Shoreline Database, J. Geophys. Res., 101, #B4, pp. 8741-8743, 

261996. 

27 

28''' 

29 

30import logging 

31import io 

32import struct 

33import time 

34import numpy as num 

35 

36from os import path 

37 

38from pyrocko import config, orthodrome 

39from .util import get_download_callback 

40 

41 

42logger = logging.getLogger('pyrocko.dataset.gshhg') 

43config = config.config() 

44 

45km = 1e3 

46micro_deg = 1e-6 

47 

48 

49def split_region_0_360(wesn): 

50 west, east, south, north = wesn 

51 if west < 0.: 

52 if east <= 0: 

53 return [(west+360., east+360., south, north)] 

54 else: 

55 return [(west+360., 360., south, north), 

56 (0., east, south, north)] 

57 else: 

58 return [wesn] 

59 

60 

61def is_valid_bounding_box(wesn): 

62 ''' 

63 Check if a given bounding box meets the GSHHG conventions. 

64 

65 :param wesn: bounding box as (west, east, south, north) in [deg] 

66 ''' 

67 

68 w, e, s, n = wesn 

69 

70 return ( 

71 w <= e 

72 and s <= n 

73 and -90.0 <= s <= 90. 

74 and -90. <= n <= 90. 

75 and -180. <= w < 360. 

76 and -180. <= e < 360.) 

77 

78 

79def is_valid_polygon(points): 

80 ''' 

81 Check if polygon points meet the GSHHG conventions. 

82 

83 :param points: Array of (lat, lon) pairs, shape (N, 2). 

84 ''' 

85 

86 lats = points[:, 0] 

87 lons = points[:, 1] 

88 

89 return ( 

90 num.all(-90. <= lats) 

91 and num.all(lats <= 90.) 

92 and num.all(-180. <= lons) 

93 and num.all(lons < 360.)) 

94 

95 

96def points_in_bounding_box(points, wesn, tolerance=0.1): 

97 ''' 

98 Check which points are contained in a given bounding box. 

99 

100 :param points: Array of (lat lon) pairs, shape (N, 2) [deg]. 

101 :param wesn: Region tuple (west, east, south, north) [deg] 

102 :param tolerance: increase the size of the test bounding box by 

103 *tolerance* [deg] on every side (Some GSHHG polygons have a too tight 

104 bounding box). 

105 

106 :returns: Bool array of shape (N,). 

107 ''' 

108 points_wrap = points.copy() 

109 points_wrap[:, 1] %= 360. 

110 

111 mask = num.zeros(points_wrap.shape[0], dtype=bool) 

112 for w, e, s, n in split_region_0_360(wesn): 

113 mask = num.logical_or( 

114 mask, 

115 num.logical_and( 

116 num.logical_and( 

117 w-tolerance <= points_wrap[:, 1], 

118 points_wrap[:, 1] <= e+tolerance), 

119 num.logical_and( 

120 s-tolerance <= points_wrap[:, 0], 

121 points_wrap[:, 0] <= n+tolerance))) 

122 

123 return mask 

124 

125 

126def point_in_bounding_box(point, wesn, tolerance=0.1): 

127 ''' 

128 Check whether point is contained in a given bounding box. 

129 

130 :param points: Array of (lat lon) pairs, shape (N, 2) [deg]. 

131 :param wesn: Region tuple (west, east, south, north) [deg] 

132 :param tolerance: increase the size of the test bounding box by 

133 *tolerance* [deg] on every side (Some GSHHG polygons have a too tight 

134 bounding box). 

135 

136 :rtype: bool 

137 ''' 

138 

139 lat, lon = point 

140 lon %= 360. 

141 for w, e, s, n in split_region_0_360(wesn): 

142 if (w-tolerance <= lon 

143 and lon <= e+tolerance 

144 and s-tolerance <= lat 

145 and lat <= n+tolerance): 

146 

147 return True 

148 

149 return False 

150 

151 

152def bounding_boxes_overlap(wesn1, wesn2): 

153 ''' 

154 Check whether two bounding boxes intersect. 

155 

156 :param wesn1: 

157 Region tuple ``(west, east, south, north)`` [deg] 

158 :type wesn1: 

159 :py:class:`tuple` of 4 :py:class:`float` 

160 

161 :param wesn2: 

162 Region tuple ``(west, east, south, north)`` [deg] 

163 :type wesn2: 

164 :py:class:`tuple` of 4 :py:class:`float` 

165 

166 :rtype: bool 

167 ''' 

168 for w1, e1, s1, n1 in split_region_0_360(wesn1): 

169 for w2, e2, s2, n2 in split_region_0_360(wesn2): 

170 if w2 <= e1 and w1 <= e2 and s2 <= n1 and s1 <= n2: 

171 return True 

172 

173 return False 

174 

175 

176def is_polygon_in_bounding_box(points, wesn, tolerance=0.1): 

177 return num.all(points_in_bounding_box(points, wesn, tolerance=tolerance)) 

178 

179 

180def bounding_box_covering_points(points): 

181 lats = points[:, 0] 

182 lat_min, lat_max = num.min(lats), num.max(lats) 

183 

184 lons = points[:, 1] 

185 lons = lons % 360. 

186 lon_min, lon_max = num.min(lons), num.max(lons) 

187 if lon_max - lon_min < 180.: 

188 return lon_min, lon_max, lat_min, lat_max 

189 

190 lons = (lons - 180.) % 360. - 180. 

191 lon_min, lon_max = num.min(lons), num.max(lons) 

192 if lon_max - lon_min < 180.: 

193 return lon_min, lon_max, lat_min, lat_max 

194 

195 return (-180., 180., lat_min, lat_max) 

196 

197 

198LEVELS = { 

199 'rivers': [ 

200 'DOUBLE_LINED_RIVERS', 

201 'PERMANENT_MAJOR_RIVERS', 

202 'ADDITIONAL_MAJOR_RIVERS', 

203 'ADDITIONAL_RIVERS', 

204 'MINOR_RIVERS', 

205 'INTERMITTENT_RIVERS-MAJOR', 

206 'INTERMITTENT_RIVERS-ADDITIONAL', 

207 'INTERMITTENT_RIVERS-MINOR', 

208 'MAJOR_CANALS', 

209 'MINOR_CANALS', 

210 'IRRIGATION_CANALS'], 

211 'coastlines': [ 

212 'LAND', 

213 'LAKE', 

214 'ISLAND_IN_LAKE', 

215 'POND_IN_ISLAND_IN_LAKE', 

216 'ANTARCTIC_ICE_FRONT', 

217 'ANTARCTIC_GROUNDING_LINE'], 

218 'borders': [ 

219 'NATIONAL_BOUNDARIES', 

220 'STATE_BOUNDARIES_WITHIN_THE_AMERICAS', 

221 'MARINE_BOUNDARIES']} 

222 

223 

224class Polygon(object): 

225 ''' 

226 Representation of a GSHHG polygon. 

227 ''' 

228 

229 RIVER_NOT_SET = 0 

230 

231 SOURCE = ['CIA_WDBII', 'WVS', 'AC'] 

232 

233 def __init__(self, data_type, gshhg_file, offset, *attr): 

234 ''' 

235 Initialise a GSHHG polygon 

236 

237 :param gshhg_file: 

238 GSHHG binary file 

239 :type gshhg_file: 

240 file handle 

241 

242 :param offset: 

243 This polygon's offset in binary file 

244 :type offset: 

245 int 

246 

247 :param attr: 

248 Polygon attributes 

249 ``(pid, npoints, _flag, west, east, south, north, 

250 area, area_full, container, ancestor)``. 

251 See :file:`gshhg.h` for details. 

252 :type attr: 

253 tuple 

254 ''' 

255 self.data_type = data_type 

256 (self.pid, self.npoints, self._flag, 

257 self.west, self.east, self.south, self.north, 

258 self.area, self.area_full, self.container, self.ancestor) = attr 

259 

260 self.west *= micro_deg 

261 self.east *= micro_deg 

262 self.south *= micro_deg 

263 self.north *= micro_deg 

264 

265 self.level_no = (self._flag & 255) 

266 if self.data_type == 'rivers': 

267 # there seems to be a bug in the current version of GSHHG 

268 self.level_no = [ 

269 0, 1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, -1, 10][self.level_no] 

270 else: 

271 self.level_no -= 1 

272 

273 self.version = (self._flag >> 8) & 255 

274 

275 cross = (self._flag >> 16) & 3 

276 self.greenwhich_crossed = True if cross == 1 or cross == 3 else False 

277 self.dateline_crossed = True if cross == 2 or cross == 3 else False 

278 

279 self.source = self.SOURCE[(self._flag >> 24) & 1] 

280 if self.level_no >= 5: 

281 self.source = self.SOURCE[2] 

282 

283 self.river = (self._flag >> 25) & 1 

284 

285 scale = 10.**(self._flag >> 26) 

286 self.area /= scale 

287 self.area_full /= scale 

288 

289 self._points = None 

290 self._file = gshhg_file 

291 self._offset = offset 

292 

293 @property 

294 def level(self): 

295 return LEVELS[self.data_type][self.level_no] 

296 

297 @property 

298 def points(self): 

299 ''' 

300 Points of the polygon. 

301 

302 Array of (lat, lon) pairs, shape (N, 2). 

303 

304 :rtype: :class:`numpy.ndarray` 

305 ''' 

306 if self._points is None: 

307 with open(self._file) as db: 

308 db.seek(self._offset) 

309 self._points = num.fromfile( 

310 db, dtype='>i4', count=self.npoints*2)\ 

311 .astype(num.float32)\ 

312 .reshape(self.npoints, 2) 

313 

314 self._points = num.fliplr(self._points) 

315 if self.data_type == 'coastlines' and self.level_no in (1, 3): 

316 self._points = self._points[::-1, :] 

317 

318 self._points *= micro_deg 

319 

320 return self._points 

321 

322 @property 

323 def lats(self): 

324 return self.points[:, 0] 

325 

326 @property 

327 def lons(self): 

328 return self.points[:, 1] 

329 

330 def is_land(self): 

331 ''' 

332 Check if the polygon is land. 

333 

334 :rtype: bool 

335 ''' 

336 return self.level_no == 0 

337 

338 def is_lake(self): 

339 ''' 

340 Check if the polygon is a lake. 

341 

342 :rtype: bool 

343 ''' 

344 return self.level_no == 1 

345 

346 def is_island_in_lake(self): 

347 ''' 

348 Check if the polygon is an island in a lake. 

349 

350 :rtype: bool 

351 ''' 

352 return self.level_no == 2 

353 

354 def is_pond_in_island_in_lake(self): 

355 ''' 

356 Check if the polygon is pond on an island in a lake. 

357 

358 :rtype: bool 

359 ''' 

360 return self.level_no == 3 

361 

362 def is_antarctic_icefront(self): 

363 ''' 

364 Check if the polygon is antarctic icefront. 

365 

366 :rtype: bool 

367 ''' 

368 return self.level_no == 4 

369 

370 def is_antarctic_grounding_line(self): 

371 ''' 

372 Check if the polygon is antarctic grounding line. 

373 

374 :rtype: bool 

375 ''' 

376 return self.level_no == 5 

377 

378 def contains_point(self, point): 

379 ''' 

380 Check if point lies in polygon. 

381 

382 :param point: (lat, lon) [deg] 

383 :type point: tuple 

384 :rtype: bool 

385 

386 See :py:func:`pyrocko.orthodrome.contains_points`. 

387 ''' 

388 return bool( 

389 self.contains_points(num.asarray(point)[num.newaxis, :])[0]) 

390 

391 def contains_points(self, points): 

392 ''' 

393 Check if points lie in polygon. 

394 

395 :param points: Array of (lat lon) pairs, shape (N, 2) [deg]. 

396 :type points: :class:`numpy.ndarray` 

397 

398 See :py:func:`pyrocko.orthodrome.contains_points`. 

399 

400 :returns: Bool array of shape (N,) 

401 ''' 

402 mask = points_in_bounding_box(points, self.get_bounding_box()) 

403 if num.any(mask): 

404 mask[mask] = orthodrome.contains_points( 

405 self.points, points[mask, :]) 

406 

407 return mask 

408 

409 def get_bounding_box(self): 

410 return (self.west, self.east, self.south, self.north) 

411 

412 def __lt__(self, polygon): 

413 return self.level_no < polygon.level_no 

414 

415 def __str__(self): 

416 rstr = '''Polygon id: {p.pid} 

417------------------- 

418Points: {p.npoints} 

419Level: {p.level} 

420Area: {p.area} km**2 

421Area Full: {p.area_full} km**2 

422Extent: {p.west} W, {p.east} E, {p.south} S, {p.north} N 

423Source: {p.source} 

424Greenwhich crossed: {p.greenwhich_crossed} 

425Dateline crossed: {p.dateline_crossed} 

426 '''.format(p=self) 

427 return rstr 

428 

429 

430class GSHHGBase(object): 

431 ''' 

432 GSHHG database access. 

433 

434 This class provides methods to select relevant polygons (land, lakes, etc.) 

435 for given locations or regions. It also provides robust high-level 

436 functions to test if the Earth is dry or wet at given coordinates. 

437 ''' 

438 

439 data_type = None 

440 

441 gshhg_url = 'https://mirror.pyrocko.org/www.soest.hawaii.edu/pwessel/gshhg/gshhg-bin-2.3.7.zip' # noqa 

442 _header_struct = struct.Struct('>IIIiiiiIIii') 

443 

444 def __init__(self, gshhg_file): 

445 ''' Initialise the database from GSHHG binary. 

446 

447 :param gshhg_file: Path to file 

448 :type gshhg_file: str 

449 : 

450 ''' 

451 t0 = time.time() 

452 self._file = gshhg_file 

453 

454 self.polygons = [] 

455 self._read_database() 

456 logger.debug('Initialised GSHHG database from %s in [%.4f s]' 

457 % (gshhg_file, time.time()-t0)) 

458 

459 def _read_database(self): 

460 with open(self._file, mode='rb') as db: 

461 while db: 

462 buf = db.read(self._header_struct.size) 

463 if not buf: 

464 break 

465 header = self._header_struct.unpack_from(buf) 

466 

467 p = Polygon( 

468 self.data_type, 

469 self._file, 

470 db.tell(), 

471 *header) 

472 

473 self.polygons.append(p) 

474 

475 offset = 8 * header[1] 

476 db.seek(offset, io.SEEK_CUR) 

477 

478 def load_all(self): 

479 data = num.fromfile(self._file, dtype='>i4').astype(num.float32) 

480 data *= micro_deg 

481 i = 11 

482 for polygon in self.polygons: 

483 if self.data_type == 'coastlines' and polygon.level_no in (1, 3): 

484 polygon._points = data[i+polygon.npoints*2-1:i-1:-1].reshape( 

485 (polygon.npoints, 2)) 

486 else: 

487 polygon._points = num.fliplr( 

488 data[i:i+polygon.npoints*2].reshape((polygon.npoints, 2))) 

489 

490 i += polygon.npoints * 2 + 11 

491 

492 assert i - 11 == data.size 

493 

494 @classmethod 

495 def _get_database(cls, filename): 

496 file = path.join(config.gshhg_dir, filename) 

497 if not path.exists(file): 

498 from pyrocko import util 

499 import zipfile 

500 

501 archive_path = path.join(config.gshhg_dir, 

502 path.basename(cls.gshhg_url)) 

503 util.download_file( 

504 cls.gshhg_url, archive_path, 

505 status_callback=get_download_callback( 

506 'Downloading GSHHG database...')) 

507 if not zipfile.is_zipfile(archive_path): 

508 raise util.DownloadError('GSHHG file is corrupted!') 

509 logger.info('Unzipping GSHHG database...') 

510 zipf = zipfile.ZipFile(archive_path) 

511 zipf.extractall(config.gshhg_dir) 

512 else: 

513 logger.debug('Using cached %s' % filename) 

514 return file 

515 

516 def get_polygons_at(self, lat, lon): 

517 ''' 

518 Get all polygons whose bounding boxes contain point. 

519 

520 :param lat: Latitude in [deg] 

521 :type lat: float 

522 :param lon: Longitude in [deg] 

523 :type lon: float 

524 :returns: List of :class:`~pyrocko.dataset.gshhg.Polygon` 

525 :rtype: list 

526 ''' 

527 rp = [] 

528 for p in self.polygons: 

529 if point_in_bounding_box((lat, lon), p.get_bounding_box()): 

530 rp.append(p) 

531 return rp 

532 

533 def get_polygons_within(self, west, east, south, north): 

534 ''' 

535 Get all polygons whose bounding boxes intersect with a bounding box. 

536 

537 :param west: Western boundary in decimal degree 

538 :type west: float 

539 :param east: Eastern boundary in decimal degree 

540 :type east: float 

541 :param north: Northern boundary in decimal degree 

542 :type north: float 

543 :param south: Southern boundary in decimal degree 

544 :type south: float 

545 :returns: List of :class:`~pyrocko.dataset.gshhg.Polygon` 

546 :rtype: list 

547 ''' 

548 

549 assert is_valid_bounding_box((west, east, south, north)) 

550 

551 rp = [] 

552 for p in self.polygons: 

553 if bounding_boxes_overlap( 

554 p.get_bounding_box(), (west, east, south, north)): 

555 

556 rp.append(p) 

557 return rp 

558 

559 

560class Coastlines(GSHHGBase): 

561 

562 data_type = 'coastlines' 

563 

564 def is_point_on_land(self, lat, lon): 

565 ''' 

566 Check whether a point is on land. 

567 

568 Lakes are considered not land. 

569 

570 :param lat: Latitude in [deg] 

571 :type lat: float 

572 :param lon: Longitude in [deg] 

573 :type lon: float 

574 

575 :rtype: bool 

576 ''' 

577 

578 relevant_polygons = self.get_polygons_at(lat, lon) 

579 relevant_polygons.sort() 

580 

581 land = False 

582 for p in relevant_polygons: 

583 if (p.is_land() or p.is_antarctic_grounding_line() or 

584 p.is_island_in_lake()): 

585 if p.contains_point((lat, lon)): 

586 land = True 

587 elif (p.is_lake() or p.is_antarctic_icefront() or 

588 p.is_pond_in_island_in_lake()): 

589 if p.contains_point((lat, lon)): 

590 land = False 

591 return land 

592 

593 def get_land_mask(self, points): 

594 ''' 

595 Check whether given points are on land. 

596 

597 Lakes are considered not land. 

598 

599 :param points: Array of (lat, lon) pairs, shape (N, 2). 

600 :type points: :class:`numpy.ndarray` 

601 :return: Boolean land mask 

602 :rtype: :class:`numpy.ndarray` of shape (N,) 

603 ''' 

604 

605 west, east, south, north = bounding_box_covering_points(points) 

606 

607 relevant_polygons = self.get_polygons_within(west, east, south, north) 

608 relevant_polygons.sort() 

609 

610 mask = num.zeros(points.shape[0], dtype=bool) 

611 for p in relevant_polygons: 

612 if (p.is_land() or p.is_antarctic_grounding_line() or 

613 p.is_island_in_lake()): 

614 land = p.contains_points(points) 

615 mask[land] = True 

616 elif p.is_lake() or p.is_pond_in_island_in_lake(): 

617 water = p.contains_points(points) 

618 mask[water] = False 

619 return mask 

620 

621 @classmethod 

622 def full(cls): 

623 ''' 

624 Return the full-resolution GSHHG database. 

625 ''' 

626 return cls(cls._get_database('gshhs_f.b')) 

627 

628 @classmethod 

629 def high(cls): 

630 ''' 

631 Return the high-resolution GSHHG database. 

632 ''' 

633 return cls(cls._get_database('gshhs_h.b')) 

634 

635 @classmethod 

636 def intermediate(cls): 

637 ''' 

638 Return the intermediate-resolution GSHHG database. 

639 ''' 

640 return cls(cls._get_database('gshhs_i.b')) 

641 

642 @classmethod 

643 def low(cls): 

644 ''' 

645 Return the low-resolution GSHHG database. 

646 ''' 

647 return cls(cls._get_database('gshhs_l.b')) 

648 

649 @classmethod 

650 def crude(cls): 

651 ''' 

652 Return the crude-resolution GSHHG database. 

653 ''' 

654 return cls(cls._get_database('gshhs_c.b')) 

655 

656 

657GSHHG = Coastlines # backwards compatibility 

658 

659 

660class Borders(GSHHGBase): 

661 

662 data_type = 'borders' 

663 

664 @classmethod 

665 def full(cls): 

666 ''' 

667 Return the full-resolution GSHHG database. 

668 ''' 

669 return cls(cls._get_database('wdb_borders_f.b')) 

670 

671 @classmethod 

672 def high(cls): 

673 ''' 

674 Return the high-resolution GSHHG database. 

675 ''' 

676 return cls(cls._get_database('wdb_borders_h.b')) 

677 

678 @classmethod 

679 def intermediate(cls): 

680 ''' 

681 Return the intermediate-resolution GSHHG database. 

682 ''' 

683 return cls(cls._get_database('wdb_borders_i.b')) 

684 

685 @classmethod 

686 def low(cls): 

687 ''' 

688 Return the low-resolution GSHHG database. 

689 ''' 

690 return cls(cls._get_database('wdb_borders_l.b')) 

691 

692 @classmethod 

693 def crude(cls): 

694 ''' 

695 Return the crude-resolution GSHHG database. 

696 ''' 

697 return cls(cls._get_database('wdb_borders_c.b')) 

698 

699 

700class Rivers(GSHHGBase): 

701 

702 data_type = 'rivers' 

703 

704 @classmethod 

705 def full(cls): 

706 ''' 

707 Return the full-resolution GSHHG database. 

708 ''' 

709 return cls(cls._get_database('wdb_rivers_f.b')) 

710 

711 @classmethod 

712 def high(cls): 

713 ''' 

714 Return the high-resolution GSHHG database. 

715 ''' 

716 return cls(cls._get_database('wdb_rivers_h.b')) 

717 

718 @classmethod 

719 def intermediate(cls): 

720 ''' 

721 Return the intermediate-resolution GSHHG database. 

722 ''' 

723 return cls(cls._get_database('wdb_rivers_i.b')) 

724 

725 @classmethod 

726 def low(cls): 

727 ''' 

728 Return the low-resolution GSHHG database. 

729 ''' 

730 return cls(cls._get_database('wdb_rivers_l.b')) 

731 

732 @classmethod 

733 def crude(cls): 

734 ''' 

735 Return the crude-resolution GSHHG database. 

736 ''' 

737 return cls(cls._get_database('wdb_rivers_c.b'))