Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/dataset/gshhg.py: 95%
269 statements
« prev ^ index » next coverage.py v6.5.0, created at 2024-01-15 12:05 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2024-01-15 12:05 +0000
1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7Interface to the `GSHHG <https://www.soest.hawaii.edu/pwessel/gshhg/>`_
8(coastlines, rivers and borders) database.
10The Global Self-consistent Hierarchical High-resolution Geography Database
11(GSHHG) is a collection of polygons representing land, lakes, rivers and
12political borders.
14If the database is not already available, it will be downloaded
15automatically on first use.
17For more information about GSHHG, see
18http://www.soest.hawaii.edu/pwessel/gshhg/.
20.. rubric:: Citation
22If you use this dataset, please cite
24Wessel, P., and W. H. F. Smith, A Global Self-consistent, Hierarchical,
25High-resolution Shoreline Database, J. Geophys. Res., 101, #B4, pp. 8741-8743,
261996.
28'''
30import logging
31import io
32import struct
33import time
34import numpy as num
36from os import path
38from pyrocko import config, orthodrome
39from .util import get_download_callback
42logger = logging.getLogger('pyrocko.dataset.gshhg')
43config = config.config()
45km = 1e3
46micro_deg = 1e-6
49def split_region_0_360(wesn):
50 west, east, south, north = wesn
51 if west < 0.:
52 if east <= 0:
53 return [(west+360., east+360., south, north)]
54 else:
55 return [(west+360., 360., south, north),
56 (0., east, south, north)]
57 else:
58 return [wesn]
61def is_valid_bounding_box(wesn):
62 '''
63 Check if a given bounding box meets the GSHHG conventions.
65 :param wesn: bounding box as (west, east, south, north) in [deg]
66 '''
68 w, e, s, n = wesn
70 return (
71 w <= e
72 and s <= n
73 and -90.0 <= s <= 90.
74 and -90. <= n <= 90.
75 and -180. <= w < 360.
76 and -180. <= e < 360.)
79def is_valid_polygon(points):
80 '''
81 Check if polygon points meet the GSHHG conventions.
83 :param points: Array of (lat, lon) pairs, shape (N, 2).
84 '''
86 lats = points[:, 0]
87 lons = points[:, 1]
89 return (
90 num.all(-90. <= lats)
91 and num.all(lats <= 90.)
92 and num.all(-180. <= lons)
93 and num.all(lons < 360.))
96def points_in_bounding_box(points, wesn, tolerance=0.1):
97 '''
98 Check which points are contained in a given bounding box.
100 :param points: Array of (lat lon) pairs, shape (N, 2) [deg].
101 :param wesn: Region tuple (west, east, south, north) [deg]
102 :param tolerance: increase the size of the test bounding box by
103 *tolerance* [deg] on every side (Some GSHHG polygons have a too tight
104 bounding box).
106 :returns: Bool array of shape (N,).
107 '''
108 points_wrap = points.copy()
109 points_wrap[:, 1] %= 360.
111 mask = num.zeros(points_wrap.shape[0], dtype=bool)
112 for w, e, s, n in split_region_0_360(wesn):
113 mask = num.logical_or(
114 mask,
115 num.logical_and(
116 num.logical_and(
117 w-tolerance <= points_wrap[:, 1],
118 points_wrap[:, 1] <= e+tolerance),
119 num.logical_and(
120 s-tolerance <= points_wrap[:, 0],
121 points_wrap[:, 0] <= n+tolerance)))
123 return mask
126def point_in_bounding_box(point, wesn, tolerance=0.1):
127 '''
128 Check whether point is contained in a given bounding box.
130 :param points: Array of (lat lon) pairs, shape (N, 2) [deg].
131 :param wesn: Region tuple (west, east, south, north) [deg]
132 :param tolerance: increase the size of the test bounding box by
133 *tolerance* [deg] on every side (Some GSHHG polygons have a too tight
134 bounding box).
136 :rtype: bool
137 '''
139 lat, lon = point
140 lon %= 360.
141 for w, e, s, n in split_region_0_360(wesn):
142 if (w-tolerance <= lon
143 and lon <= e+tolerance
144 and s-tolerance <= lat
145 and lat <= n+tolerance):
147 return True
149 return False
152def bounding_boxes_overlap(wesn1, wesn2):
153 '''
154 Check whether two bounding boxes intersect.
156 :param wesn1:
157 Region tuple ``(west, east, south, north)`` [deg]
158 :type wesn1:
159 :py:class:`tuple` of 4 :py:class:`float`
161 :param wesn2:
162 Region tuple ``(west, east, south, north)`` [deg]
163 :type wesn2:
164 :py:class:`tuple` of 4 :py:class:`float`
166 :rtype: bool
167 '''
168 for w1, e1, s1, n1 in split_region_0_360(wesn1):
169 for w2, e2, s2, n2 in split_region_0_360(wesn2):
170 if w2 <= e1 and w1 <= e2 and s2 <= n1 and s1 <= n2:
171 return True
173 return False
176def is_polygon_in_bounding_box(points, wesn, tolerance=0.1):
177 return num.all(points_in_bounding_box(points, wesn, tolerance=tolerance))
180def bounding_box_covering_points(points):
181 lats = points[:, 0]
182 lat_min, lat_max = num.min(lats), num.max(lats)
184 lons = points[:, 1]
185 lons = lons % 360.
186 lon_min, lon_max = num.min(lons), num.max(lons)
187 if lon_max - lon_min < 180.:
188 return lon_min, lon_max, lat_min, lat_max
190 lons = (lons - 180.) % 360. - 180.
191 lon_min, lon_max = num.min(lons), num.max(lons)
192 if lon_max - lon_min < 180.:
193 return lon_min, lon_max, lat_min, lat_max
195 return (-180., 180., lat_min, lat_max)
198LEVELS = {
199 'rivers': [
200 'DOUBLE_LINED_RIVERS',
201 'PERMANENT_MAJOR_RIVERS',
202 'ADDITIONAL_MAJOR_RIVERS',
203 'ADDITIONAL_RIVERS',
204 'MINOR_RIVERS',
205 'INTERMITTENT_RIVERS-MAJOR',
206 'INTERMITTENT_RIVERS-ADDITIONAL',
207 'INTERMITTENT_RIVERS-MINOR',
208 'MAJOR_CANALS',
209 'MINOR_CANALS',
210 'IRRIGATION_CANALS'],
211 'coastlines': [
212 'LAND',
213 'LAKE',
214 'ISLAND_IN_LAKE',
215 'POND_IN_ISLAND_IN_LAKE',
216 'ANTARCTIC_ICE_FRONT',
217 'ANTARCTIC_GROUNDING_LINE'],
218 'borders': [
219 'NATIONAL_BOUNDARIES',
220 'STATE_BOUNDARIES_WITHIN_THE_AMERICAS',
221 'MARINE_BOUNDARIES']}
224class Polygon(object):
225 '''
226 Representation of a GSHHG polygon.
227 '''
229 RIVER_NOT_SET = 0
231 SOURCE = ['CIA_WDBII', 'WVS', 'AC']
233 def __init__(self, data_type, gshhg_file, offset, *attr):
234 '''
235 Initialise a GSHHG polygon
237 :param gshhg_file:
238 GSHHG binary file
239 :type gshhg_file:
240 file handle
242 :param offset:
243 This polygon's offset in binary file
244 :type offset:
245 int
247 :param attr:
248 Polygon attributes
249 ``(pid, npoints, _flag, west, east, south, north,
250 area, area_full, container, ancestor)``.
251 See :file:`gshhg.h` for details.
252 :type attr:
253 tuple
254 '''
255 self.data_type = data_type
256 (self.pid, self.npoints, self._flag,
257 self.west, self.east, self.south, self.north,
258 self.area, self.area_full, self.container, self.ancestor) = attr
260 self.west *= micro_deg
261 self.east *= micro_deg
262 self.south *= micro_deg
263 self.north *= micro_deg
265 self.level_no = (self._flag & 255)
266 if self.data_type == 'rivers':
267 # there seems to be a bug in the current version of GSHHG
268 self.level_no = [
269 0, 1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, -1, 10][self.level_no]
270 else:
271 self.level_no -= 1
273 self.version = (self._flag >> 8) & 255
275 cross = (self._flag >> 16) & 3
276 self.greenwhich_crossed = True if cross == 1 or cross == 3 else False
277 self.dateline_crossed = True if cross == 2 or cross == 3 else False
279 self.source = self.SOURCE[(self._flag >> 24) & 1]
280 if self.level_no >= 5:
281 self.source = self.SOURCE[2]
283 self.river = (self._flag >> 25) & 1
285 scale = 10.**(self._flag >> 26)
286 self.area /= scale
287 self.area_full /= scale
289 self._points = None
290 self._file = gshhg_file
291 self._offset = offset
293 @property
294 def level(self):
295 return LEVELS[self.data_type][self.level_no]
297 @property
298 def points(self):
299 '''
300 Points of the polygon.
302 Array of (lat, lon) pairs, shape (N, 2).
304 :rtype: :class:`numpy.ndarray`
305 '''
306 if self._points is None:
307 with open(self._file) as db:
308 db.seek(self._offset)
309 self._points = num.fromfile(
310 db, dtype='>i4', count=self.npoints*2)\
311 .astype(num.float32)\
312 .reshape(self.npoints, 2)
314 self._points = num.fliplr(self._points)
315 if self.data_type == 'coastlines' and self.level_no in (1, 3):
316 self._points = self._points[::-1, :]
318 self._points *= micro_deg
320 return self._points
322 @property
323 def lats(self):
324 return self.points[:, 0]
326 @property
327 def lons(self):
328 return self.points[:, 1]
330 def is_land(self):
331 '''
332 Check if the polygon is land.
334 :rtype: bool
335 '''
336 return self.level_no == 0
338 def is_lake(self):
339 '''
340 Check if the polygon is a lake.
342 :rtype: bool
343 '''
344 return self.level_no == 1
346 def is_island_in_lake(self):
347 '''
348 Check if the polygon is an island in a lake.
350 :rtype: bool
351 '''
352 return self.level_no == 2
354 def is_pond_in_island_in_lake(self):
355 '''
356 Check if the polygon is pond on an island in a lake.
358 :rtype: bool
359 '''
360 return self.level_no == 3
362 def is_antarctic_icefront(self):
363 '''
364 Check if the polygon is antarctic icefront.
366 :rtype: bool
367 '''
368 return self.level_no == 4
370 def is_antarctic_grounding_line(self):
371 '''
372 Check if the polygon is antarctic grounding line.
374 :rtype: bool
375 '''
376 return self.level_no == 5
378 def contains_point(self, point):
379 '''
380 Check if point lies in polygon.
382 :param point: (lat, lon) [deg]
383 :type point: tuple
384 :rtype: bool
386 See :py:func:`pyrocko.orthodrome.contains_points`.
387 '''
388 return bool(
389 self.contains_points(num.asarray(point)[num.newaxis, :])[0])
391 def contains_points(self, points):
392 '''
393 Check if points lie in polygon.
395 :param points: Array of (lat lon) pairs, shape (N, 2) [deg].
396 :type points: :class:`numpy.ndarray`
398 See :py:func:`pyrocko.orthodrome.contains_points`.
400 :returns: Bool array of shape (N,)
401 '''
402 mask = points_in_bounding_box(points, self.get_bounding_box())
403 if num.any(mask):
404 mask[mask] = orthodrome.contains_points(
405 self.points, points[mask, :])
407 return mask
409 def get_bounding_box(self):
410 return (self.west, self.east, self.south, self.north)
412 def __lt__(self, polygon):
413 return self.level_no < polygon.level_no
415 def __str__(self):
416 rstr = '''Polygon id: {p.pid}
417-------------------
418Points: {p.npoints}
419Level: {p.level}
420Area: {p.area} km**2
421Area Full: {p.area_full} km**2
422Extent: {p.west} W, {p.east} E, {p.south} S, {p.north} N
423Source: {p.source}
424Greenwhich crossed: {p.greenwhich_crossed}
425Dateline crossed: {p.dateline_crossed}
426 '''.format(p=self)
427 return rstr
430class GSHHGBase(object):
431 '''
432 GSHHG database access.
434 This class provides methods to select relevant polygons (land, lakes, etc.)
435 for given locations or regions. It also provides robust high-level
436 functions to test if the Earth is dry or wet at given coordinates.
437 '''
439 data_type = None
441 gshhg_url = 'https://mirror.pyrocko.org/www.soest.hawaii.edu/pwessel/gshhg/gshhg-bin-2.3.7.zip' # noqa
442 _header_struct = struct.Struct('>IIIiiiiIIii')
444 def __init__(self, gshhg_file):
445 ''' Initialise the database from GSHHG binary.
447 :param gshhg_file: Path to file
448 :type gshhg_file: str
449 :
450 '''
451 t0 = time.time()
452 self._file = gshhg_file
454 self.polygons = []
455 self._read_database()
456 logger.debug('Initialised GSHHG database from %s in [%.4f s]'
457 % (gshhg_file, time.time()-t0))
459 def _read_database(self):
460 with open(self._file, mode='rb') as db:
461 while db:
462 buf = db.read(self._header_struct.size)
463 if not buf:
464 break
465 header = self._header_struct.unpack_from(buf)
467 p = Polygon(
468 self.data_type,
469 self._file,
470 db.tell(),
471 *header)
473 self.polygons.append(p)
475 offset = 8 * header[1]
476 db.seek(offset, io.SEEK_CUR)
478 def load_all(self):
479 data = num.fromfile(self._file, dtype='>i4').astype(num.float32)
480 data *= micro_deg
481 i = 11
482 for polygon in self.polygons:
483 if self.data_type == 'coastlines' and polygon.level_no in (1, 3):
484 polygon._points = data[i+polygon.npoints*2-1:i-1:-1].reshape(
485 (polygon.npoints, 2))
486 else:
487 polygon._points = num.fliplr(
488 data[i:i+polygon.npoints*2].reshape((polygon.npoints, 2)))
490 i += polygon.npoints * 2 + 11
492 assert i - 11 == data.size
494 @classmethod
495 def _get_database(cls, filename):
496 file = path.join(config.gshhg_dir, filename)
497 if not path.exists(file):
498 from pyrocko import util
499 import zipfile
501 archive_path = path.join(config.gshhg_dir,
502 path.basename(cls.gshhg_url))
503 util.download_file(
504 cls.gshhg_url, archive_path,
505 status_callback=get_download_callback(
506 'Downloading GSHHG database...'))
507 if not zipfile.is_zipfile(archive_path):
508 raise util.DownloadError('GSHHG file is corrupted!')
509 logger.info('Unzipping GSHHG database...')
510 zipf = zipfile.ZipFile(archive_path)
511 zipf.extractall(config.gshhg_dir)
512 else:
513 logger.debug('Using cached %s' % filename)
514 return file
516 def get_polygons_at(self, lat, lon):
517 '''
518 Get all polygons whose bounding boxes contain point.
520 :param lat: Latitude in [deg]
521 :type lat: float
522 :param lon: Longitude in [deg]
523 :type lon: float
524 :returns: List of :class:`~pyrocko.dataset.gshhg.Polygon`
525 :rtype: list
526 '''
527 rp = []
528 for p in self.polygons:
529 if point_in_bounding_box((lat, lon), p.get_bounding_box()):
530 rp.append(p)
531 return rp
533 def get_polygons_within(self, west, east, south, north):
534 '''
535 Get all polygons whose bounding boxes intersect with a bounding box.
537 :param west: Western boundary in decimal degree
538 :type west: float
539 :param east: Eastern boundary in decimal degree
540 :type east: float
541 :param north: Northern boundary in decimal degree
542 :type north: float
543 :param south: Southern boundary in decimal degree
544 :type south: float
545 :returns: List of :class:`~pyrocko.dataset.gshhg.Polygon`
546 :rtype: list
547 '''
549 assert is_valid_bounding_box((west, east, south, north))
551 rp = []
552 for p in self.polygons:
553 if bounding_boxes_overlap(
554 p.get_bounding_box(), (west, east, south, north)):
556 rp.append(p)
557 return rp
560class Coastlines(GSHHGBase):
562 data_type = 'coastlines'
564 def is_point_on_land(self, lat, lon):
565 '''
566 Check whether a point is on land.
568 Lakes are considered not land.
570 :param lat: Latitude in [deg]
571 :type lat: float
572 :param lon: Longitude in [deg]
573 :type lon: float
575 :rtype: bool
576 '''
578 relevant_polygons = self.get_polygons_at(lat, lon)
579 relevant_polygons.sort()
581 land = False
582 for p in relevant_polygons:
583 if (p.is_land() or p.is_antarctic_grounding_line() or
584 p.is_island_in_lake()):
585 if p.contains_point((lat, lon)):
586 land = True
587 elif (p.is_lake() or p.is_antarctic_icefront() or
588 p.is_pond_in_island_in_lake()):
589 if p.contains_point((lat, lon)):
590 land = False
591 return land
593 def get_land_mask(self, points):
594 '''
595 Check whether given points are on land.
597 Lakes are considered not land.
599 :param points: Array of (lat, lon) pairs, shape (N, 2).
600 :type points: :class:`numpy.ndarray`
601 :return: Boolean land mask
602 :rtype: :class:`numpy.ndarray` of shape (N,)
603 '''
605 west, east, south, north = bounding_box_covering_points(points)
607 relevant_polygons = self.get_polygons_within(west, east, south, north)
608 relevant_polygons.sort()
610 mask = num.zeros(points.shape[0], dtype=bool)
611 for p in relevant_polygons:
612 if (p.is_land() or p.is_antarctic_grounding_line() or
613 p.is_island_in_lake()):
614 land = p.contains_points(points)
615 mask[land] = True
616 elif p.is_lake() or p.is_pond_in_island_in_lake():
617 water = p.contains_points(points)
618 mask[water] = False
619 return mask
621 @classmethod
622 def full(cls):
623 '''
624 Return the full-resolution GSHHG database.
625 '''
626 return cls(cls._get_database('gshhs_f.b'))
628 @classmethod
629 def high(cls):
630 '''
631 Return the high-resolution GSHHG database.
632 '''
633 return cls(cls._get_database('gshhs_h.b'))
635 @classmethod
636 def intermediate(cls):
637 '''
638 Return the intermediate-resolution GSHHG database.
639 '''
640 return cls(cls._get_database('gshhs_i.b'))
642 @classmethod
643 def low(cls):
644 '''
645 Return the low-resolution GSHHG database.
646 '''
647 return cls(cls._get_database('gshhs_l.b'))
649 @classmethod
650 def crude(cls):
651 '''
652 Return the crude-resolution GSHHG database.
653 '''
654 return cls(cls._get_database('gshhs_c.b'))
657GSHHG = Coastlines # backwards compatibility
660class Borders(GSHHGBase):
662 data_type = 'borders'
664 @classmethod
665 def full(cls):
666 '''
667 Return the full-resolution GSHHG database.
668 '''
669 return cls(cls._get_database('wdb_borders_f.b'))
671 @classmethod
672 def high(cls):
673 '''
674 Return the high-resolution GSHHG database.
675 '''
676 return cls(cls._get_database('wdb_borders_h.b'))
678 @classmethod
679 def intermediate(cls):
680 '''
681 Return the intermediate-resolution GSHHG database.
682 '''
683 return cls(cls._get_database('wdb_borders_i.b'))
685 @classmethod
686 def low(cls):
687 '''
688 Return the low-resolution GSHHG database.
689 '''
690 return cls(cls._get_database('wdb_borders_l.b'))
692 @classmethod
693 def crude(cls):
694 '''
695 Return the crude-resolution GSHHG database.
696 '''
697 return cls(cls._get_database('wdb_borders_c.b'))
700class Rivers(GSHHGBase):
702 data_type = 'rivers'
704 @classmethod
705 def full(cls):
706 '''
707 Return the full-resolution GSHHG database.
708 '''
709 return cls(cls._get_database('wdb_rivers_f.b'))
711 @classmethod
712 def high(cls):
713 '''
714 Return the high-resolution GSHHG database.
715 '''
716 return cls(cls._get_database('wdb_rivers_h.b'))
718 @classmethod
719 def intermediate(cls):
720 '''
721 Return the intermediate-resolution GSHHG database.
722 '''
723 return cls(cls._get_database('wdb_rivers_i.b'))
725 @classmethod
726 def low(cls):
727 '''
728 Return the low-resolution GSHHG database.
729 '''
730 return cls(cls._get_database('wdb_rivers_l.b'))
732 @classmethod
733 def crude(cls):
734 '''
735 Return the crude-resolution GSHHG database.
736 '''
737 return cls(cls._get_database('wdb_rivers_c.b'))