1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7Interface to the GSHHG (coastlines, rivers and borders) database. 

8 

9The Global Self-consistent Hierarchical High-resolution Geography Database 

10(GSHHG) is a collection of polygons representing land, lakes, rivers and 

11political borders. 

12 

13If the database is not already available, it will be downloaded 

14automatically on first use. 

15 

16For more information about GSHHG, see 

17http://www.soest.hawaii.edu/pwessel/gshhg/. 

18 

19.. note:: 

20 

21 **If you use this dataset, please cite:** 

22 

23 Wessel, P., and W. H. F. 

24 Smith, A Global Self-consistent, Hierarchical, High-resolution 

25 Shoreline Database, J. Geophys. Res., 101, #B4, pp. 8741-8743, 1996. 

26''' 

27 

28import logging 

29import io 

30import struct 

31import time 

32import numpy as num 

33 

34from os import path 

35 

36from pyrocko import config, orthodrome 

37from .util import get_download_callback 

38 

39 

40logger = logging.getLogger('pyrocko.dataset.gshhg') 

41config = config.config() 

42 

43km = 1e3 

44micro_deg = 1e-6 

45 

46 

47def split_region_0_360(wesn): 

48 west, east, south, north = wesn 

49 if west < 0.: 

50 if east <= 0: 

51 return [(west+360., east+360., south, north)] 

52 else: 

53 return [(west+360., 360., south, north), 

54 (0., east, south, north)] 

55 else: 

56 return [wesn] 

57 

58 

59def is_valid_bounding_box(wesn): 

60 ''' 

61 Check if a given bounding box meets the GSHHG conventions. 

62 

63 :param wesn: bounding box as (west, east, south, north) in [deg] 

64 ''' 

65 

66 w, e, s, n = wesn 

67 

68 return ( 

69 w <= e 

70 and s <= n 

71 and -90.0 <= s <= 90. 

72 and -90. <= n <= 90. 

73 and -180. <= w < 360. 

74 and -180. <= e < 360.) 

75 

76 

77def is_valid_polygon(points): 

78 ''' 

79 Check if polygon points meet the GSHHG conventions. 

80 

81 :param points: Array of (lat, lon) pairs, shape (N, 2). 

82 ''' 

83 

84 lats = points[:, 0] 

85 lons = points[:, 1] 

86 

87 return ( 

88 num.all(-90. <= lats) 

89 and num.all(lats <= 90.) 

90 and num.all(-180. <= lons) 

91 and num.all(lons < 360.)) 

92 

93 

94def points_in_bounding_box(points, wesn, tolerance=0.1): 

95 ''' 

96 Check which points are contained in a given bounding box. 

97 

98 :param points: Array of (lat lon) pairs, shape (N, 2) [deg]. 

99 :param wesn: Region tuple (west, east, south, north) [deg] 

100 :param tolerance: increase the size of the test bounding box by 

101 *tolerance* [deg] on every side (Some GSHHG polygons have a too tight 

102 bounding box). 

103 

104 :returns: Bool array of shape (N,). 

105 ''' 

106 points_wrap = points.copy() 

107 points_wrap[:, 1] %= 360. 

108 

109 mask = num.zeros(points_wrap.shape[0], dtype=bool) 

110 for w, e, s, n in split_region_0_360(wesn): 

111 mask = num.logical_or( 

112 mask, 

113 num.logical_and( 

114 num.logical_and( 

115 w-tolerance <= points_wrap[:, 1], 

116 points_wrap[:, 1] <= e+tolerance), 

117 num.logical_and( 

118 s-tolerance <= points_wrap[:, 0], 

119 points_wrap[:, 0] <= n+tolerance))) 

120 

121 return mask 

122 

123 

124def point_in_bounding_box(point, wesn, tolerance=0.1): 

125 ''' 

126 Check whether point is contained in a given bounding box. 

127 

128 :param points: Array of (lat lon) pairs, shape (N, 2) [deg]. 

129 :param wesn: Region tuple (west, east, south, north) [deg] 

130 :param tolerance: increase the size of the test bounding box by 

131 *tolerance* [deg] on every side (Some GSHHG polygons have a too tight 

132 bounding box). 

133 

134 :rtype: bool 

135 ''' 

136 

137 lat, lon = point 

138 lon %= 360. 

139 for w, e, s, n in split_region_0_360(wesn): 

140 if (w-tolerance <= lon 

141 and lon <= e+tolerance 

142 and s-tolerance <= lat 

143 and lat <= n+tolerance): 

144 

145 return True 

146 

147 return False 

148 

149 

150def bounding_boxes_overlap(wesn1, wesn2): 

151 ''' 

152 Check whether two bounding boxes intersect. 

153 

154 :param wesn1, wesn2: Region tuples (west, east, south, north) [deg] 

155 

156 :rtype: bool 

157 ''' 

158 for w1, e1, s1, n1 in split_region_0_360(wesn1): 

159 for w2, e2, s2, n2 in split_region_0_360(wesn2): 

160 if w2 <= e1 and w1 <= e2 and s2 <= n1 and s1 <= n2: 

161 return True 

162 

163 return False 

164 

165 

166def is_polygon_in_bounding_box(points, wesn, tolerance=0.1): 

167 return num.all(points_in_bounding_box(points, wesn, tolerance=tolerance)) 

168 

169 

170def bounding_box_covering_points(points): 

171 lats = points[:, 0] 

172 lat_min, lat_max = num.min(lats), num.max(lats) 

173 

174 lons = points[:, 1] 

175 lons = lons % 360. 

176 lon_min, lon_max = num.min(lons), num.max(lons) 

177 if lon_max - lon_min < 180.: 

178 return lon_min, lon_max, lat_min, lat_max 

179 

180 lons = (lons - 180.) % 360. - 180. 

181 lon_min, lon_max = num.min(lons), num.max(lons) 

182 if lon_max - lon_min < 180.: 

183 return lon_min, lon_max, lat_min, lat_max 

184 

185 return (-180., 180., lat_min, lat_max) 

186 

187 

188class Polygon(object): 

189 ''' 

190 Representation of a GSHHG polygon. 

191 ''' 

192 

193 RIVER_NOT_SET = 0 

194 

195 LEVELS = ['LAND', 'LAKE', 'ISLAND_IN_LAKE', 'POND_IN_ISLAND_IN_LAKE', 

196 'ANTARCTIC_ICE_FRONT', 'ANTARCTIC_GROUNDING_LINE'] 

197 

198 SOURCE = ['CIA_WDBII', 'WVS', 'AC'] 

199 

200 def __init__(self, gshhg_file, offset, *attr): 

201 ''' 

202 Initialise a GSHHG polygon 

203 

204 :param gshhg_file: GSHHG binary file 

205 :type gshhg_file: str 

206 :param offset: This polygon's offset in binary file 

207 :type offset: int 

208 :param attr: Polygon attributes 

209 ``(pid, npoints, _flag, west, east, south, north, 

210 area, area_full, container, ancestor)``. 

211 See :file:`gshhg.h` for details. 

212 :type attr: tuple 

213 ''' 

214 (self.pid, self.npoints, self._flag, 

215 self.west, self.east, self.south, self.north, 

216 self.area, self.area_full, self.container, self.ancestor) = attr 

217 

218 self.west *= micro_deg 

219 self.east *= micro_deg 

220 self.south *= micro_deg 

221 self.north *= micro_deg 

222 

223 self.level_no = (self._flag & 255) 

224 print(self.level_no, self.LEVELS) 

225 self.level = self.LEVELS[self.level_no - 1] 

226 self.version = (self._flag >> 8) & 255 

227 

228 cross = (self._flag >> 16) & 3 

229 self.greenwhich_crossed = True if cross == 1 or cross == 3 else False 

230 self.dateline_crossed = True if cross == 2 or cross == 3 else False 

231 

232 self.source = self.SOURCE[(self._flag >> 24) & 1] 

233 if self.level_no >= 5: 

234 self.source = self.SOURCE[2] 

235 

236 self.river = (self._flag >> 25) & 1 

237 

238 scale = 10.**(self._flag >> 26) 

239 self.area /= scale 

240 self.area_full /= scale 

241 

242 self._points = None 

243 self._file = gshhg_file 

244 self._offset = offset 

245 

246 @property 

247 def points(self): 

248 ''' 

249 Points of the polygon. 

250 

251 Array of (lat, lon) pairs, shape (N, 2). 

252 

253 :rtype: :class:`numpy.ndarray` 

254 ''' 

255 if self._points is None: 

256 with open(self._file) as db: 

257 db.seek(self._offset) 

258 self._points = num.fromfile( 

259 db, dtype='>i4', count=self.npoints*2)\ 

260 .astype(num.float32)\ 

261 .reshape(self.npoints, 2) 

262 

263 self._points = num.fliplr(self._points) 

264 if self.level_no in (2, 4): 

265 self._points = self._points[::-1, :] 

266 

267 self._points *= micro_deg 

268 return self._points 

269 

270 @property 

271 def lats(self): 

272 return self.points[:, 0] 

273 

274 @property 

275 def lons(self): 

276 return self.points[:, 1] 

277 

278 def _is_level(self, level): 

279 if self.level is self.LEVELS[level]: 

280 return True 

281 return False 

282 

283 def is_land(self): 

284 ''' 

285 Check if the polygon is land. 

286 

287 :rtype: bool 

288 ''' 

289 return self._is_level(0) 

290 

291 def is_lake(self): 

292 ''' 

293 Check if the polygon is a lake. 

294 

295 :rtype: bool 

296 ''' 

297 return self._is_level(1) 

298 

299 def is_island_in_lake(self): 

300 ''' 

301 Check if the polygon is an island in a lake. 

302 

303 :rtype: bool 

304 ''' 

305 return self._is_level(2) 

306 

307 def is_pond_in_island_in_lake(self): 

308 ''' 

309 Check if the polygon is pond on an island in a lake. 

310 

311 :rtype: bool 

312 ''' 

313 return self._is_level(3) 

314 

315 def is_antarctic_icefront(self): 

316 ''' 

317 Check if the polygon is antarctic icefront. 

318 

319 :rtype: bool 

320 ''' 

321 return self._is_level(4) 

322 

323 def is_antarctic_grounding_line(self): 

324 ''' 

325 Check if the polygon is antarctic grounding line. 

326 

327 :rtype: bool 

328 ''' 

329 return self._is_level(5) 

330 

331 def contains_point(self, point): 

332 ''' 

333 Check if point lies in polygon. 

334 

335 :param point: (lat, lon) [deg] 

336 :type point: tuple 

337 :rtype: bool 

338 

339 See :py:func:`pyrocko.orthodrome.contains_points`. 

340 ''' 

341 return bool( 

342 self.contains_points(num.asarray(point)[num.newaxis, :])[0]) 

343 

344 def contains_points(self, points): 

345 ''' 

346 Check if points lie in polygon. 

347 

348 :param points: Array of (lat lon) pairs, shape (N, 2) [deg]. 

349 :type points: :class:`numpy.ndarray` 

350 

351 See :py:func:`pyrocko.orthodrome.contains_points`. 

352 

353 :returns: Bool array of shape (N,) 

354 ''' 

355 mask = points_in_bounding_box(points, self.get_bounding_box()) 

356 if num.any(mask): 

357 mask[mask] = orthodrome.contains_points( 

358 self.points, points[mask, :]) 

359 

360 return mask 

361 

362 def get_bounding_box(self): 

363 return (self.west, self.east, self.south, self.north) 

364 

365 def __lt__(self, polygon): 

366 return self.level_no < polygon.level_no 

367 

368 def __str__(self): 

369 rstr = '''Polygon id: {p.pid} 

370------------------- 

371Points: {p.npoints} 

372Level: {p.level} 

373Area: {p.area} km**2 

374Area Full: {p.area_full} km**2 

375Extent: {p.west} W, {p.east} E, {p.south} S, {p.north} N 

376Source: {p.source} 

377Greenwhich crossed: {p.greenwhich_crossed} 

378Dateline crossed: {p.dateline_crossed} 

379 '''.format(p=self) 

380 return rstr 

381 

382 

383class RiverPolygon(Polygon): 

384 

385 LEVELS = [ 

386 'DOUBLE_LINED_RIVERS', 

387 'PERMANENT_MAJOR_RIVERS', 

388 'ADDITIONAL_MAJOR_RIVERS', 

389 'ADDITIONAL_RIVERS', 

390 'MINOR_RIVERS', 

391 'INTERMITTENT_RIVERS-MAJOR', 

392 'INTERMITTENT_RIVERS-ADDITIONAL', 

393 'INTERMITTENT_RIVERS-MINOR', 

394 'MAJOR_CANALS', 

395 'MINOR_CANALS', 

396 'IRRIGATION_CANALS', 

397 'DOCS_MISS', 

398 'DOCS_MISS', 'DOCS_MISS', 'DOCS_MISS', 'DOCS_MISS', 'DOCS_MISS'] 

399 

400 

401class GSHHGBase(object): 

402 ''' 

403 GSHHG database access. 

404 

405 This class provides methods to select relevant polygons (land, lakes, etc.) 

406 for given locations or regions. It also provides robust high-level 

407 functions to test if the Earth is dry or wet at given coordinates. 

408 ''' 

409 

410 gshhg_url = 'https://mirror.pyrocko.org/www.soest.hawaii.edu/pwessel/gshhg/gshhg-bin-2.3.7.zip' # noqa 

411 _header_struct = struct.Struct('>IIIiiiiIIii') 

412 

413 def __init__(self, gshhg_file): 

414 ''' Initialise the database from GSHHG binary. 

415 

416 :param gshhg_file: Path to file 

417 :type gshhg_file: str 

418 : 

419 ''' 

420 t0 = time.time() 

421 self._file = gshhg_file 

422 

423 self.polygons = [] 

424 self._read_database() 

425 logger.debug('Initialised GSHHG database from %s in [%.4f s]' 

426 % (gshhg_file, time.time()-t0)) 

427 

428 def _read_database(self): 

429 with open(self._file, mode='rb') as db: 

430 while db: 

431 buf = db.read(self._header_struct.size) 

432 if not buf: 

433 break 

434 header = self._header_struct.unpack_from(buf) 

435 dataset = path.basename(self._file).split("_")[1] 

436 print(dataset) 

437 if dataset == 'rivers': 

438 LoadPolygon = RiverPolygon 

439 else: 

440 LoadPolygon = Polygon 

441 

442 p = LoadPolygon( 

443 self._file, 

444 db.tell(), 

445 *header) 

446 self.polygons.append(p) 

447 

448 offset = 8 * header[1] 

449 db.seek(offset, io.SEEK_CUR) 

450 

451 @classmethod 

452 def _get_database(cls, filename): 

453 file = path.join(config.gshhg_dir, filename) 

454 if not path.exists(file): 

455 from pyrocko import util 

456 import zipfile 

457 

458 archive_path = path.join(config.gshhg_dir, 

459 path.basename(cls.gshhg_url)) 

460 util.download_file( 

461 cls.gshhg_url, archive_path, 

462 status_callback=get_download_callback( 

463 'Downloading GSHHG database...')) 

464 if not zipfile.is_zipfile(archive_path): 

465 raise util.DownloadError('GSHHG file is corrupted!') 

466 logger.info('Unzipping GSHHG database...') 

467 zipf = zipfile.ZipFile(archive_path) 

468 zipf.extractall(config.gshhg_dir) 

469 else: 

470 logger.debug('Using cached %s' % filename) 

471 return file 

472 

473 def get_polygons_at(self, lat, lon): 

474 ''' 

475 Get all polygons whose bounding boxes contain point. 

476 

477 :param lat: Latitude in [deg] 

478 :type lat: float 

479 :param lon: Longitude in [deg] 

480 :type lon: float 

481 :returns: List of :class:`~pyrocko.dataset.gshhg.Polygon` 

482 :rtype: list 

483 ''' 

484 rp = [] 

485 for p in self.polygons: 

486 if point_in_bounding_box((lat, lon), p.get_bounding_box()): 

487 rp.append(p) 

488 return rp 

489 

490 def get_polygons_within(self, west, east, south, north): 

491 ''' 

492 Get all polygons whose bounding boxes intersect with a bounding box. 

493 

494 :param west: Western boundary in decimal degree 

495 :type west: float 

496 :param east: Eastern boundary in decimal degree 

497 :type east: float 

498 :param north: Northern boundary in decimal degree 

499 :type north: float 

500 :param south: Southern boundary in decimal degree 

501 :type south: float 

502 :returns: List of :class:`~pyrocko.dataset.gshhg.Polygon` 

503 :rtype: list 

504 ''' 

505 

506 assert is_valid_bounding_box((west, east, south, north)) 

507 

508 rp = [] 

509 for p in self.polygons: 

510 if bounding_boxes_overlap( 

511 p.get_bounding_box(), (west, east, south, north)): 

512 

513 rp.append(p) 

514 return rp 

515 

516 

517class Coastlines(GSHHGBase): 

518 

519 def is_point_on_land(self, lat, lon): 

520 ''' 

521 Check whether a point is on land. 

522 

523 Lakes are considered not land. 

524 

525 :param lat: Latitude in [deg] 

526 :type lat: float 

527 :param lon: Longitude in [deg] 

528 :type lon: float 

529 

530 :rtype: bool 

531 ''' 

532 

533 relevant_polygons = self.get_polygons_at(lat, lon) 

534 relevant_polygons.sort() 

535 

536 land = False 

537 for p in relevant_polygons: 

538 if (p.is_land() or p.is_antarctic_grounding_line() or 

539 p.is_island_in_lake()): 

540 if p.contains_point((lat, lon)): 

541 land = True 

542 elif (p.is_lake() or p.is_antarctic_icefront() or 

543 p.is_pond_in_island_in_lake()): 

544 if p.contains_point((lat, lon)): 

545 land = False 

546 return land 

547 

548 def get_land_mask(self, points): 

549 ''' 

550 Check whether given points are on land. 

551 

552 Lakes are considered not land. 

553 

554 :param points: Array of (lat, lon) pairs, shape (N, 2). 

555 :type points: :class:`numpy.ndarray` 

556 :return: Boolean land mask 

557 :rtype: :class:`numpy.ndarray` of shape (N,) 

558 ''' 

559 

560 west, east, south, north = bounding_box_covering_points(points) 

561 

562 relevant_polygons = self.get_polygons_within(west, east, south, north) 

563 relevant_polygons.sort() 

564 

565 mask = num.zeros(points.shape[0], dtype=bool) 

566 for p in relevant_polygons: 

567 if (p.is_land() or p.is_antarctic_grounding_line() or 

568 p.is_island_in_lake()): 

569 land = p.contains_points(points) 

570 mask[land] = True 

571 elif p.is_lake() or p.is_pond_in_island_in_lake(): 

572 water = p.contains_points(points) 

573 mask[water] = False 

574 return mask 

575 

576 @classmethod 

577 def full(cls): 

578 ''' 

579 Return the full-resolution GSHHG database. 

580 ''' 

581 return cls(cls._get_database('gshhs_f.b')) 

582 

583 @classmethod 

584 def high(cls): 

585 ''' 

586 Return the high-resolution GSHHG database. 

587 ''' 

588 return cls(cls._get_database('gshhs_h.b')) 

589 

590 @classmethod 

591 def intermediate(cls): 

592 ''' 

593 Return the intermediate-resolution GSHHG database. 

594 ''' 

595 return cls(cls._get_database('gshhs_i.b')) 

596 

597 @classmethod 

598 def low(cls): 

599 ''' 

600 Return the low-resolution GSHHG database. 

601 ''' 

602 return cls(cls._get_database('gshhs_l.b')) 

603 

604 @classmethod 

605 def crude(cls): 

606 ''' 

607 Return the crude-resolution GSHHG database. 

608 ''' 

609 return cls(cls._get_database('gshhs_c.b')) 

610 

611 

612class GSHHG(Coastlines): 

613 pass 

614 

615 

616class Borders(GSHHGBase): 

617 

618 @classmethod 

619 def full(cls): 

620 ''' 

621 Return the full-resolution GSHHG database. 

622 ''' 

623 return cls(cls._get_database('wdb_borders_f.b')) 

624 

625 @classmethod 

626 def high(cls): 

627 ''' 

628 Return the high-resolution GSHHG database. 

629 ''' 

630 return cls(cls._get_database('wdb_borders_h.b')) 

631 

632 @classmethod 

633 def intermediate(cls): 

634 ''' 

635 Return the intermediate-resolution GSHHG database. 

636 ''' 

637 return cls(cls._get_database('wdb_borders_.b')) 

638 

639 @classmethod 

640 def low(cls): 

641 ''' 

642 Return the low-resolution GSHHG database. 

643 ''' 

644 return cls(cls._get_database('wdb_borders_l.b')) 

645 

646 @classmethod 

647 def crude(cls): 

648 ''' 

649 Return the crude-resolution GSHHG database. 

650 ''' 

651 return cls(cls._get_database('wdb_borders_c.b')) 

652 

653 

654class Rivers(GSHHGBase): 

655 

656 @classmethod 

657 def full(cls): 

658 ''' 

659 Return the full-resolution GSHHG database. 

660 ''' 

661 return cls(cls._get_database('wdb_rivers_f.b')) 

662 

663 @classmethod 

664 def high(cls): 

665 ''' 

666 Return the high-resolution GSHHG database. 

667 ''' 

668 return cls(cls._get_database('wdb_rivers_h.b')) 

669 

670 @classmethod 

671 def intermediate(cls): 

672 ''' 

673 Return the intermediate-resolution GSHHG database. 

674 ''' 

675 return cls(cls._get_database('wdb_rivers_.b')) 

676 

677 @classmethod 

678 def low(cls): 

679 ''' 

680 Return the low-resolution GSHHG database. 

681 ''' 

682 return cls(cls._get_database('wdb_rivers_l.b')) 

683 

684 @classmethod 

685 def crude(cls): 

686 ''' 

687 Return the crude-resolution GSHHG database. 

688 ''' 

689 return cls(cls._get_database('wdb_rivers_c.b'))