1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7Interface to the GSHHG (coastlines, rivers and borders) database. 

8 

9The Global Self-consistent Hierarchical High-resolution Geography Database 

10(GSHHG) is a collection of polygons representing land, lakes, rivers and 

11political borders. 

12 

13If the database is not already available, it will be downloaded 

14automatically on first use. 

15 

16For more information about GSHHG, see 

17http://www.soest.hawaii.edu/pwessel/gshhg/. 

18 

19.. note:: 

20 

21 **If you use this dataset, please cite:** 

22 

23 Wessel, P., and W. H. F. 

24 Smith, A Global Self-consistent, Hierarchical, High-resolution 

25 Shoreline Database, J. Geophys. Res., 101, #B4, pp. 8741-8743, 1996. 

26''' 

27 

28import logging 

29import io 

30import struct 

31import time 

32import numpy as num 

33 

34from os import path 

35 

36from pyrocko import config, orthodrome 

37from .util import get_download_callback 

38 

39 

40logger = logging.getLogger('pyrocko.dataset.gshhg') 

41config = config.config() 

42 

43km = 1e3 

44micro_deg = 1e-6 

45 

46 

47def split_region_0_360(wesn): 

48 west, east, south, north = wesn 

49 if west < 0.: 

50 if east <= 0: 

51 return [(west+360., east+360., south, north)] 

52 else: 

53 return [(west+360., 360., south, north), 

54 (0., east, south, north)] 

55 else: 

56 return [wesn] 

57 

58 

59def is_valid_bounding_box(wesn): 

60 ''' 

61 Check if a given bounding box meets the GSHHG conventions. 

62 

63 :param wesn: bounding box as (west, east, south, north) in [deg] 

64 ''' 

65 

66 w, e, s, n = wesn 

67 

68 return ( 

69 w <= e 

70 and s <= n 

71 and -90.0 <= s <= 90. 

72 and -90. <= n <= 90. 

73 and -180. <= w < 360. 

74 and -180. <= e < 360.) 

75 

76 

77def is_valid_polygon(points): 

78 ''' 

79 Check if polygon points meet the GSHHG conventions. 

80 

81 :param points: Array of (lat, lon) pairs, shape (N, 2). 

82 ''' 

83 

84 lats = points[:, 0] 

85 lons = points[:, 1] 

86 

87 return ( 

88 num.all(-90. <= lats) 

89 and num.all(lats <= 90.) 

90 and num.all(-180. <= lons) 

91 and num.all(lons < 360.)) 

92 

93 

94def points_in_bounding_box(points, wesn, tolerance=0.1): 

95 ''' 

96 Check which points are contained in a given bounding box. 

97 

98 :param points: Array of (lat lon) pairs, shape (N, 2) [deg]. 

99 :param wesn: Region tuple (west, east, south, north) [deg] 

100 :param tolerance: increase the size of the test bounding box by 

101 *tolerance* [deg] on every side (Some GSHHG polygons have a too tight 

102 bounding box). 

103 

104 :returns: Bool array of shape (N,). 

105 ''' 

106 points_wrap = points.copy() 

107 points_wrap[:, 1] %= 360. 

108 

109 mask = num.zeros(points_wrap.shape[0], dtype=bool) 

110 for w, e, s, n in split_region_0_360(wesn): 

111 mask = num.logical_or( 

112 mask, 

113 num.logical_and( 

114 num.logical_and( 

115 w-tolerance <= points_wrap[:, 1], 

116 points_wrap[:, 1] <= e+tolerance), 

117 num.logical_and( 

118 s-tolerance <= points_wrap[:, 0], 

119 points_wrap[:, 0] <= n+tolerance))) 

120 

121 return mask 

122 

123 

124def point_in_bounding_box(point, wesn, tolerance=0.1): 

125 ''' 

126 Check whether point is contained in a given bounding box. 

127 

128 :param points: Array of (lat lon) pairs, shape (N, 2) [deg]. 

129 :param wesn: Region tuple (west, east, south, north) [deg] 

130 :param tolerance: increase the size of the test bounding box by 

131 *tolerance* [deg] on every side (Some GSHHG polygons have a too tight 

132 bounding box). 

133 

134 :rtype: bool 

135 ''' 

136 

137 lat, lon = point 

138 lon %= 360. 

139 for w, e, s, n in split_region_0_360(wesn): 

140 if (w-tolerance <= lon 

141 and lon <= e+tolerance 

142 and s-tolerance <= lat 

143 and lat <= n+tolerance): 

144 

145 return True 

146 

147 return False 

148 

149 

150def bounding_boxes_overlap(wesn1, wesn2): 

151 ''' 

152 Check whether two bounding boxes intersect. 

153 

154 :param wesn1, wesn2: Region tuples (west, east, south, north) [deg] 

155 

156 :rtype: bool 

157 ''' 

158 for w1, e1, s1, n1 in split_region_0_360(wesn1): 

159 for w2, e2, s2, n2 in split_region_0_360(wesn2): 

160 if w2 <= e1 and w1 <= e2 and s2 <= n1 and s1 <= n2: 

161 return True 

162 

163 return False 

164 

165 

166def is_polygon_in_bounding_box(points, wesn, tolerance=0.1): 

167 return num.all(points_in_bounding_box(points, wesn, tolerance=tolerance)) 

168 

169 

170def bounding_box_covering_points(points): 

171 lats = points[:, 0] 

172 lat_min, lat_max = num.min(lats), num.max(lats) 

173 

174 lons = points[:, 1] 

175 lons = lons % 360. 

176 lon_min, lon_max = num.min(lons), num.max(lons) 

177 if lon_max - lon_min < 180.: 

178 return lon_min, lon_max, lat_min, lat_max 

179 

180 lons = (lons - 180.) % 360. - 180. 

181 lon_min, lon_max = num.min(lons), num.max(lons) 

182 if lon_max - lon_min < 180.: 

183 return lon_min, lon_max, lat_min, lat_max 

184 

185 return (-180., 180., lat_min, lat_max) 

186 

187 

188class Polygon(object): 

189 ''' 

190 Representation of a GSHHG polygon. 

191 ''' 

192 

193 RIVER_NOT_SET = 0 

194 

195 LEVELS = ['LAND', 'LAKE', 'ISLAND_IN_LAKE', 'POND_IN_ISLAND_IN_LAKE', 

196 'ANTARCTIC_ICE_FRONT', 'ANTARCTIC_GROUNDING_LINE'] 

197 

198 SOURCE = ['CIA_WDBII', 'WVS', 'AC'] 

199 

200 def __init__(self, gshhg_file, offset, *attr): 

201 ''' 

202 Initialise a GSHHG polygon 

203 

204 :param gshhg_file: GSHHG binary file 

205 :type gshhg_file: str 

206 :param offset: This polygon's offset in binary file 

207 :type offset: int 

208 :param attr: Polygon attributes 

209 ``(pid, npoints, _flag, west, east, south, north, 

210 area, area_full, container, ancestor)``. 

211 See :file:`gshhg.h` for details. 

212 :type attr: tuple 

213 ''' 

214 (self.pid, self.npoints, self._flag, 

215 self.west, self.east, self.south, self.north, 

216 self.area, self.area_full, self.container, self.ancestor) = attr 

217 

218 self.west *= micro_deg 

219 self.east *= micro_deg 

220 self.south *= micro_deg 

221 self.north *= micro_deg 

222 

223 self.level_no = (self._flag & 255) 

224 self.level = self.LEVELS[self.level_no - 1] 

225 self.version = (self._flag >> 8) & 255 

226 

227 cross = (self._flag >> 16) & 3 

228 self.greenwhich_crossed = True if cross == 1 or cross == 3 else False 

229 self.dateline_crossed = True if cross == 2 or cross == 3 else False 

230 

231 self.source = self.SOURCE[(self._flag >> 24) & 1] 

232 if self.level_no >= 5: 

233 self.source = self.SOURCE[2] 

234 

235 self.river = (self._flag >> 25) & 1 

236 

237 scale = 10.**(self._flag >> 26) 

238 self.area /= scale 

239 self.area_full /= scale 

240 

241 self._points = None 

242 self._file = gshhg_file 

243 self._offset = offset 

244 

245 @property 

246 def points(self): 

247 ''' 

248 Points of the polygon. 

249 

250 Array of (lat, lon) pairs, shape (N, 2). 

251 

252 :rtype: :class:`numpy.ndarray` 

253 ''' 

254 if self._points is None: 

255 with open(self._file) as db: 

256 db.seek(self._offset) 

257 self._points = num.fromfile( 

258 db, dtype='>i4', count=self.npoints*2)\ 

259 .astype(num.float32)\ 

260 .reshape(self.npoints, 2) 

261 

262 self._points = num.fliplr(self._points) 

263 if self.level_no in (2, 4): 

264 self._points = self._points[::-1, :] 

265 

266 self._points *= micro_deg 

267 return self._points 

268 

269 @property 

270 def lats(self): 

271 return self.points[:, 0] 

272 

273 @property 

274 def lons(self): 

275 return self.points[:, 1] 

276 

277 def _is_level(self, level): 

278 if self.level is self.LEVELS[level]: 

279 return True 

280 return False 

281 

282 def is_land(self): 

283 ''' 

284 Check if the polygon is land. 

285 

286 :rtype: bool 

287 ''' 

288 return self._is_level(0) 

289 

290 def is_lake(self): 

291 ''' 

292 Check if the polygon is a lake. 

293 

294 :rtype: bool 

295 ''' 

296 return self._is_level(1) 

297 

298 def is_island_in_lake(self): 

299 ''' 

300 Check if the polygon is an island in a lake. 

301 

302 :rtype: bool 

303 ''' 

304 return self._is_level(2) 

305 

306 def is_pond_in_island_in_lake(self): 

307 ''' 

308 Check if the polygon is pond on an island in a lake. 

309 

310 :rtype: bool 

311 ''' 

312 return self._is_level(3) 

313 

314 def is_antarctic_icefront(self): 

315 ''' 

316 Check if the polygon is antarctic icefront. 

317 

318 :rtype: bool 

319 ''' 

320 return self._is_level(4) 

321 

322 def is_antarctic_grounding_line(self): 

323 ''' 

324 Check if the polygon is antarctic grounding line. 

325 

326 :rtype: bool 

327 ''' 

328 return self._is_level(5) 

329 

330 def contains_point(self, point): 

331 ''' 

332 Check if point lies in polygon. 

333 

334 :param point: (lat, lon) [deg] 

335 :type point: tuple 

336 :rtype: bool 

337 

338 See :py:func:`pyrocko.orthodrome.contains_points`. 

339 ''' 

340 return bool( 

341 self.contains_points(num.asarray(point)[num.newaxis, :])[0]) 

342 

343 def contains_points(self, points): 

344 ''' 

345 Check if points lie in polygon. 

346 

347 :param points: Array of (lat lon) pairs, shape (N, 2) [deg]. 

348 :type points: :class:`numpy.ndarray` 

349 

350 See :py:func:`pyrocko.orthodrome.contains_points`. 

351 

352 :returns: Bool array of shape (N,) 

353 ''' 

354 mask = points_in_bounding_box(points, self.get_bounding_box()) 

355 if num.any(mask): 

356 mask[mask] = orthodrome.contains_points( 

357 self.points, points[mask, :]) 

358 

359 return mask 

360 

361 def get_bounding_box(self): 

362 return (self.west, self.east, self.south, self.north) 

363 

364 def __lt__(self, polygon): 

365 return self.level_no < polygon.level_no 

366 

367 def __str__(self): 

368 rstr = '''Polygon id: {p.pid} 

369------------------- 

370Points: {p.npoints} 

371Level: {p.level} 

372Area: {p.area} km**2 

373Area Full: {p.area_full} km**2 

374Extent: {p.west} W, {p.east} E, {p.south} S, {p.north} N 

375Source: {p.source} 

376Greenwhich crossed: {p.greenwhich_crossed} 

377Dateline crossed: {p.dateline_crossed} 

378 '''.format(p=self) 

379 return rstr 

380 

381 

382class RiverPolygon(Polygon): 

383 

384 LEVELS = [ 

385 'DOUBLE_LINED_RIVERS', 

386 'PERMANENT_MAJOR_RIVERS', 

387 'ADDITIONAL_MAJOR_RIVERS', 

388 'ADDITIONAL_RIVERS', 

389 'MINOR_RIVERS', 

390 'INTERMITTENT_RIVERS-MAJOR', 

391 'INTERMITTENT_RIVERS-ADDITIONAL', 

392 'INTERMITTENT_RIVERS-MINOR', 

393 'MAJOR_CANALS', 

394 'MINOR_CANALS', 

395 'IRRIGATION_CANALS', 

396 'DOCS_MISS', 

397 'DOCS_MISS', 'DOCS_MISS', 'DOCS_MISS', 'DOCS_MISS', 'DOCS_MISS'] 

398 

399 

400class GSHHGBase(object): 

401 ''' 

402 GSHHG database access. 

403 

404 This class provides methods to select relevant polygons (land, lakes, etc.) 

405 for given locations or regions. It also provides robust high-level 

406 functions to test if the Earth is dry or wet at given coordinates. 

407 ''' 

408 

409 gshhg_url = 'https://mirror.pyrocko.org/www.soest.hawaii.edu/pwessel/gshhg/gshhg-bin-2.3.7.zip' # noqa 

410 _header_struct = struct.Struct('>IIIiiiiIIii') 

411 

412 def __init__(self, gshhg_file): 

413 ''' Initialise the database from GSHHG binary. 

414 

415 :param gshhg_file: Path to file 

416 :type gshhg_file: str 

417 : 

418 ''' 

419 t0 = time.time() 

420 self._file = gshhg_file 

421 

422 self.polygons = [] 

423 self._read_database() 

424 logger.debug('Initialised GSHHG database from %s in [%.4f s]' 

425 % (gshhg_file, time.time()-t0)) 

426 

427 def _read_database(self): 

428 with open(self._file, mode='rb') as db: 

429 while db: 

430 buf = db.read(self._header_struct.size) 

431 if not buf: 

432 break 

433 header = self._header_struct.unpack_from(buf) 

434 dataset = path.basename(self._file).split("_")[1] 

435 

436 if dataset == 'rivers': 

437 LoadPolygon = RiverPolygon 

438 else: 

439 LoadPolygon = Polygon 

440 

441 p = LoadPolygon( 

442 self._file, 

443 db.tell(), 

444 *header) 

445 self.polygons.append(p) 

446 

447 offset = 8 * header[1] 

448 db.seek(offset, io.SEEK_CUR) 

449 

450 @classmethod 

451 def _get_database(cls, filename): 

452 file = path.join(config.gshhg_dir, filename) 

453 if not path.exists(file): 

454 from pyrocko import util 

455 import zipfile 

456 

457 archive_path = path.join(config.gshhg_dir, 

458 path.basename(cls.gshhg_url)) 

459 util.download_file( 

460 cls.gshhg_url, archive_path, 

461 status_callback=get_download_callback( 

462 'Downloading GSHHG database...')) 

463 if not zipfile.is_zipfile(archive_path): 

464 raise util.DownloadError('GSHHG file is corrupted!') 

465 logger.info('Unzipping GSHHG database...') 

466 zipf = zipfile.ZipFile(archive_path) 

467 zipf.extractall(config.gshhg_dir) 

468 else: 

469 logger.debug('Using cached %s' % filename) 

470 return file 

471 

472 def get_polygons_at(self, lat, lon): 

473 ''' 

474 Get all polygons whose bounding boxes contain point. 

475 

476 :param lat: Latitude in [deg] 

477 :type lat: float 

478 :param lon: Longitude in [deg] 

479 :type lon: float 

480 :returns: List of :class:`~pyrocko.dataset.gshhg.Polygon` 

481 :rtype: list 

482 ''' 

483 rp = [] 

484 for p in self.polygons: 

485 if point_in_bounding_box((lat, lon), p.get_bounding_box()): 

486 rp.append(p) 

487 return rp 

488 

489 def get_polygons_within(self, west, east, south, north): 

490 ''' 

491 Get all polygons whose bounding boxes intersect with a bounding box. 

492 

493 :param west: Western boundary in decimal degree 

494 :type west: float 

495 :param east: Eastern boundary in decimal degree 

496 :type east: float 

497 :param north: Northern boundary in decimal degree 

498 :type north: float 

499 :param south: Southern boundary in decimal degree 

500 :type south: float 

501 :returns: List of :class:`~pyrocko.dataset.gshhg.Polygon` 

502 :rtype: list 

503 ''' 

504 

505 assert is_valid_bounding_box((west, east, south, north)) 

506 

507 rp = [] 

508 for p in self.polygons: 

509 if bounding_boxes_overlap( 

510 p.get_bounding_box(), (west, east, south, north)): 

511 

512 rp.append(p) 

513 return rp 

514 

515 

516class Coastlines(GSHHGBase): 

517 

518 def is_point_on_land(self, lat, lon): 

519 ''' 

520 Check whether a point is on land. 

521 

522 Lakes are considered not land. 

523 

524 :param lat: Latitude in [deg] 

525 :type lat: float 

526 :param lon: Longitude in [deg] 

527 :type lon: float 

528 

529 :rtype: bool 

530 ''' 

531 

532 relevant_polygons = self.get_polygons_at(lat, lon) 

533 relevant_polygons.sort() 

534 

535 land = False 

536 for p in relevant_polygons: 

537 if (p.is_land() or p.is_antarctic_grounding_line() or 

538 p.is_island_in_lake()): 

539 if p.contains_point((lat, lon)): 

540 land = True 

541 elif (p.is_lake() or p.is_antarctic_icefront() or 

542 p.is_pond_in_island_in_lake()): 

543 if p.contains_point((lat, lon)): 

544 land = False 

545 return land 

546 

547 def get_land_mask(self, points): 

548 ''' 

549 Check whether given points are on land. 

550 

551 Lakes are considered not land. 

552 

553 :param points: Array of (lat, lon) pairs, shape (N, 2). 

554 :type points: :class:`numpy.ndarray` 

555 :return: Boolean land mask 

556 :rtype: :class:`numpy.ndarray` of shape (N,) 

557 ''' 

558 

559 west, east, south, north = bounding_box_covering_points(points) 

560 

561 relevant_polygons = self.get_polygons_within(west, east, south, north) 

562 relevant_polygons.sort() 

563 

564 mask = num.zeros(points.shape[0], dtype=bool) 

565 for p in relevant_polygons: 

566 if (p.is_land() or p.is_antarctic_grounding_line() or 

567 p.is_island_in_lake()): 

568 land = p.contains_points(points) 

569 mask[land] = True 

570 elif p.is_lake() or p.is_pond_in_island_in_lake(): 

571 water = p.contains_points(points) 

572 mask[water] = False 

573 return mask 

574 

575 @classmethod 

576 def full(cls): 

577 ''' 

578 Return the full-resolution GSHHG database. 

579 ''' 

580 return cls(cls._get_database('gshhs_f.b')) 

581 

582 @classmethod 

583 def high(cls): 

584 ''' 

585 Return the high-resolution GSHHG database. 

586 ''' 

587 return cls(cls._get_database('gshhs_h.b')) 

588 

589 @classmethod 

590 def intermediate(cls): 

591 ''' 

592 Return the intermediate-resolution GSHHG database. 

593 ''' 

594 return cls(cls._get_database('gshhs_i.b')) 

595 

596 @classmethod 

597 def low(cls): 

598 ''' 

599 Return the low-resolution GSHHG database. 

600 ''' 

601 return cls(cls._get_database('gshhs_l.b')) 

602 

603 @classmethod 

604 def crude(cls): 

605 ''' 

606 Return the crude-resolution GSHHG database. 

607 ''' 

608 return cls(cls._get_database('gshhs_c.b')) 

609 

610 

611class GSHHG(Coastlines): 

612 pass 

613 

614 

615class Borders(GSHHGBase): 

616 

617 @classmethod 

618 def full(cls): 

619 ''' 

620 Return the full-resolution GSHHG database. 

621 ''' 

622 return cls(cls._get_database('wdb_borders_f.b')) 

623 

624 @classmethod 

625 def high(cls): 

626 ''' 

627 Return the high-resolution GSHHG database. 

628 ''' 

629 return cls(cls._get_database('wdb_borders_h.b')) 

630 

631 @classmethod 

632 def intermediate(cls): 

633 ''' 

634 Return the intermediate-resolution GSHHG database. 

635 ''' 

636 return cls(cls._get_database('wdb_borders_i.b')) 

637 

638 @classmethod 

639 def low(cls): 

640 ''' 

641 Return the low-resolution GSHHG database. 

642 ''' 

643 return cls(cls._get_database('wdb_borders_l.b')) 

644 

645 @classmethod 

646 def crude(cls): 

647 ''' 

648 Return the crude-resolution GSHHG database. 

649 ''' 

650 return cls(cls._get_database('wdb_borders_c.b')) 

651 

652 

653class Rivers(GSHHGBase): 

654 

655 @classmethod 

656 def full(cls): 

657 ''' 

658 Return the full-resolution GSHHG database. 

659 ''' 

660 return cls(cls._get_database('wdb_rivers_f.b')) 

661 

662 @classmethod 

663 def high(cls): 

664 ''' 

665 Return the high-resolution GSHHG database. 

666 ''' 

667 return cls(cls._get_database('wdb_rivers_h.b')) 

668 

669 @classmethod 

670 def intermediate(cls): 

671 ''' 

672 Return the intermediate-resolution GSHHG database. 

673 ''' 

674 return cls(cls._get_database('wdb_rivers_i.b')) 

675 

676 @classmethod 

677 def low(cls): 

678 ''' 

679 Return the low-resolution GSHHG database. 

680 ''' 

681 return cls(cls._get_database('wdb_rivers_l.b')) 

682 

683 @classmethod 

684 def crude(cls): 

685 ''' 

686 Return the crude-resolution GSHHG database. 

687 ''' 

688 return cls(cls._get_database('wdb_rivers_c.b'))