1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7Interface to the GSHHG (coastlines, rivers and borders) database.
9The Global Self-consistent Hierarchical High-resolution Geography Database
10(GSHHG) is a collection of polygons representing land, lakes, rivers and
11political borders.
13If the database is not already available, it will be downloaded
14automatically on first use.
16For more information about GSHHG, see
17http://www.soest.hawaii.edu/pwessel/gshhg/.
19.. note::
21 **If you use this dataset, please cite:**
23 Wessel, P., and W. H. F.
24 Smith, A Global Self-consistent, Hierarchical, High-resolution
25 Shoreline Database, J. Geophys. Res., 101, #B4, pp. 8741-8743, 1996.
26'''
28import logging
29import io
30import struct
31import time
32import numpy as num
34from os import path
36from pyrocko import config, orthodrome
37from .util import get_download_callback
40logger = logging.getLogger('pyrocko.dataset.gshhg')
41config = config.config()
43km = 1e3
44micro_deg = 1e-6
47def split_region_0_360(wesn):
48 west, east, south, north = wesn
49 if west < 0.:
50 if east <= 0:
51 return [(west+360., east+360., south, north)]
52 else:
53 return [(west+360., 360., south, north),
54 (0., east, south, north)]
55 else:
56 return [wesn]
59def is_valid_bounding_box(wesn):
60 '''
61 Check if a given bounding box meets the GSHHG conventions.
63 :param wesn: bounding box as (west, east, south, north) in [deg]
64 '''
66 w, e, s, n = wesn
68 return (
69 w <= e
70 and s <= n
71 and -90.0 <= s <= 90.
72 and -90. <= n <= 90.
73 and -180. <= w < 360.
74 and -180. <= e < 360.)
77def is_valid_polygon(points):
78 '''
79 Check if polygon points meet the GSHHG conventions.
81 :param points: Array of (lat, lon) pairs, shape (N, 2).
82 '''
84 lats = points[:, 0]
85 lons = points[:, 1]
87 return (
88 num.all(-90. <= lats)
89 and num.all(lats <= 90.)
90 and num.all(-180. <= lons)
91 and num.all(lons < 360.))
94def points_in_bounding_box(points, wesn, tolerance=0.1):
95 '''
96 Check which points are contained in a given bounding box.
98 :param points: Array of (lat lon) pairs, shape (N, 2) [deg].
99 :param wesn: Region tuple (west, east, south, north) [deg]
100 :param tolerance: increase the size of the test bounding box by
101 *tolerance* [deg] on every side (Some GSHHG polygons have a too tight
102 bounding box).
104 :returns: Bool array of shape (N,).
105 '''
106 points_wrap = points.copy()
107 points_wrap[:, 1] %= 360.
109 mask = num.zeros(points_wrap.shape[0], dtype=bool)
110 for w, e, s, n in split_region_0_360(wesn):
111 mask = num.logical_or(
112 mask,
113 num.logical_and(
114 num.logical_and(
115 w-tolerance <= points_wrap[:, 1],
116 points_wrap[:, 1] <= e+tolerance),
117 num.logical_and(
118 s-tolerance <= points_wrap[:, 0],
119 points_wrap[:, 0] <= n+tolerance)))
121 return mask
124def point_in_bounding_box(point, wesn, tolerance=0.1):
125 '''
126 Check whether point is contained in a given bounding box.
128 :param points: Array of (lat lon) pairs, shape (N, 2) [deg].
129 :param wesn: Region tuple (west, east, south, north) [deg]
130 :param tolerance: increase the size of the test bounding box by
131 *tolerance* [deg] on every side (Some GSHHG polygons have a too tight
132 bounding box).
134 :rtype: bool
135 '''
137 lat, lon = point
138 lon %= 360.
139 for w, e, s, n in split_region_0_360(wesn):
140 if (w-tolerance <= lon
141 and lon <= e+tolerance
142 and s-tolerance <= lat
143 and lat <= n+tolerance):
145 return True
147 return False
150def bounding_boxes_overlap(wesn1, wesn2):
151 '''
152 Check whether two bounding boxes intersect.
154 :param wesn1, wesn2: Region tuples (west, east, south, north) [deg]
156 :rtype: bool
157 '''
158 for w1, e1, s1, n1 in split_region_0_360(wesn1):
159 for w2, e2, s2, n2 in split_region_0_360(wesn2):
160 if w2 <= e1 and w1 <= e2 and s2 <= n1 and s1 <= n2:
161 return True
163 return False
166def is_polygon_in_bounding_box(points, wesn, tolerance=0.1):
167 return num.all(points_in_bounding_box(points, wesn, tolerance=tolerance))
170def bounding_box_covering_points(points):
171 lats = points[:, 0]
172 lat_min, lat_max = num.min(lats), num.max(lats)
174 lons = points[:, 1]
175 lons = lons % 360.
176 lon_min, lon_max = num.min(lons), num.max(lons)
177 if lon_max - lon_min < 180.:
178 return lon_min, lon_max, lat_min, lat_max
180 lons = (lons - 180.) % 360. - 180.
181 lon_min, lon_max = num.min(lons), num.max(lons)
182 if lon_max - lon_min < 180.:
183 return lon_min, lon_max, lat_min, lat_max
185 return (-180., 180., lat_min, lat_max)
188class Polygon(object):
189 '''
190 Representation of a GSHHG polygon.
191 '''
193 RIVER_NOT_SET = 0
195 LEVELS = ['LAND', 'LAKE', 'ISLAND_IN_LAKE', 'POND_IN_ISLAND_IN_LAKE',
196 'ANTARCTIC_ICE_FRONT', 'ANTARCTIC_GROUNDING_LINE']
198 SOURCE = ['CIA_WDBII', 'WVS', 'AC']
200 def __init__(self, gshhg_file, offset, *attr):
201 '''
202 Initialise a GSHHG polygon
204 :param gshhg_file: GSHHG binary file
205 :type gshhg_file: str
206 :param offset: This polygon's offset in binary file
207 :type offset: int
208 :param attr: Polygon attributes
209 ``(pid, npoints, _flag, west, east, south, north,
210 area, area_full, container, ancestor)``.
211 See :file:`gshhg.h` for details.
212 :type attr: tuple
213 '''
214 (self.pid, self.npoints, self._flag,
215 self.west, self.east, self.south, self.north,
216 self.area, self.area_full, self.container, self.ancestor) = attr
218 self.west *= micro_deg
219 self.east *= micro_deg
220 self.south *= micro_deg
221 self.north *= micro_deg
223 self.level_no = (self._flag & 255)
224 self.level = self.LEVELS[self.level_no - 1]
225 self.version = (self._flag >> 8) & 255
227 cross = (self._flag >> 16) & 3
228 self.greenwhich_crossed = True if cross == 1 or cross == 3 else False
229 self.dateline_crossed = True if cross == 2 or cross == 3 else False
231 self.source = self.SOURCE[(self._flag >> 24) & 1]
232 if self.level_no >= 5:
233 self.source = self.SOURCE[2]
235 self.river = (self._flag >> 25) & 1
237 scale = 10.**(self._flag >> 26)
238 self.area /= scale
239 self.area_full /= scale
241 self._points = None
242 self._file = gshhg_file
243 self._offset = offset
245 @property
246 def points(self):
247 '''
248 Points of the polygon.
250 Array of (lat, lon) pairs, shape (N, 2).
252 :rtype: :class:`numpy.ndarray`
253 '''
254 if self._points is None:
255 with open(self._file) as db:
256 db.seek(self._offset)
257 self._points = num.fromfile(
258 db, dtype='>i4', count=self.npoints*2)\
259 .astype(num.float32)\
260 .reshape(self.npoints, 2)
262 self._points = num.fliplr(self._points)
263 if self.level_no in (2, 4):
264 self._points = self._points[::-1, :]
266 self._points *= micro_deg
267 return self._points
269 @property
270 def lats(self):
271 return self.points[:, 0]
273 @property
274 def lons(self):
275 return self.points[:, 1]
277 def _is_level(self, level):
278 if self.level is self.LEVELS[level]:
279 return True
280 return False
282 def is_land(self):
283 '''
284 Check if the polygon is land.
286 :rtype: bool
287 '''
288 return self._is_level(0)
290 def is_lake(self):
291 '''
292 Check if the polygon is a lake.
294 :rtype: bool
295 '''
296 return self._is_level(1)
298 def is_island_in_lake(self):
299 '''
300 Check if the polygon is an island in a lake.
302 :rtype: bool
303 '''
304 return self._is_level(2)
306 def is_pond_in_island_in_lake(self):
307 '''
308 Check if the polygon is pond on an island in a lake.
310 :rtype: bool
311 '''
312 return self._is_level(3)
314 def is_antarctic_icefront(self):
315 '''
316 Check if the polygon is antarctic icefront.
318 :rtype: bool
319 '''
320 return self._is_level(4)
322 def is_antarctic_grounding_line(self):
323 '''
324 Check if the polygon is antarctic grounding line.
326 :rtype: bool
327 '''
328 return self._is_level(5)
330 def contains_point(self, point):
331 '''
332 Check if point lies in polygon.
334 :param point: (lat, lon) [deg]
335 :type point: tuple
336 :rtype: bool
338 See :py:func:`pyrocko.orthodrome.contains_points`.
339 '''
340 return bool(
341 self.contains_points(num.asarray(point)[num.newaxis, :])[0])
343 def contains_points(self, points):
344 '''
345 Check if points lie in polygon.
347 :param points: Array of (lat lon) pairs, shape (N, 2) [deg].
348 :type points: :class:`numpy.ndarray`
350 See :py:func:`pyrocko.orthodrome.contains_points`.
352 :returns: Bool array of shape (N,)
353 '''
354 mask = points_in_bounding_box(points, self.get_bounding_box())
355 if num.any(mask):
356 mask[mask] = orthodrome.contains_points(
357 self.points, points[mask, :])
359 return mask
361 def get_bounding_box(self):
362 return (self.west, self.east, self.south, self.north)
364 def __lt__(self, polygon):
365 return self.level_no < polygon.level_no
367 def __str__(self):
368 rstr = '''Polygon id: {p.pid}
369-------------------
370Points: {p.npoints}
371Level: {p.level}
372Area: {p.area} km**2
373Area Full: {p.area_full} km**2
374Extent: {p.west} W, {p.east} E, {p.south} S, {p.north} N
375Source: {p.source}
376Greenwhich crossed: {p.greenwhich_crossed}
377Dateline crossed: {p.dateline_crossed}
378 '''.format(p=self)
379 return rstr
382class RiverPolygon(Polygon):
384 LEVELS = [
385 'DOUBLE_LINED_RIVERS',
386 'PERMANENT_MAJOR_RIVERS',
387 'ADDITIONAL_MAJOR_RIVERS',
388 'ADDITIONAL_RIVERS',
389 'MINOR_RIVERS',
390 'INTERMITTENT_RIVERS-MAJOR',
391 'INTERMITTENT_RIVERS-ADDITIONAL',
392 'INTERMITTENT_RIVERS-MINOR',
393 'MAJOR_CANALS',
394 'MINOR_CANALS',
395 'IRRIGATION_CANALS',
396 'DOCS_MISS',
397 'DOCS_MISS', 'DOCS_MISS', 'DOCS_MISS', 'DOCS_MISS', 'DOCS_MISS']
400class GSHHGBase(object):
401 '''
402 GSHHG database access.
404 This class provides methods to select relevant polygons (land, lakes, etc.)
405 for given locations or regions. It also provides robust high-level
406 functions to test if the Earth is dry or wet at given coordinates.
407 '''
409 gshhg_url = 'https://mirror.pyrocko.org/www.soest.hawaii.edu/pwessel/gshhg/gshhg-bin-2.3.7.zip' # noqa
410 _header_struct = struct.Struct('>IIIiiiiIIii')
412 def __init__(self, gshhg_file):
413 ''' Initialise the database from GSHHG binary.
415 :param gshhg_file: Path to file
416 :type gshhg_file: str
417 :
418 '''
419 t0 = time.time()
420 self._file = gshhg_file
422 self.polygons = []
423 self._read_database()
424 logger.debug('Initialised GSHHG database from %s in [%.4f s]'
425 % (gshhg_file, time.time()-t0))
427 def _read_database(self):
428 with open(self._file, mode='rb') as db:
429 while db:
430 buf = db.read(self._header_struct.size)
431 if not buf:
432 break
433 header = self._header_struct.unpack_from(buf)
434 dataset = path.basename(self._file).split("_")[1]
436 if dataset == 'rivers':
437 LoadPolygon = RiverPolygon
438 else:
439 LoadPolygon = Polygon
441 p = LoadPolygon(
442 self._file,
443 db.tell(),
444 *header)
445 self.polygons.append(p)
447 offset = 8 * header[1]
448 db.seek(offset, io.SEEK_CUR)
450 @classmethod
451 def _get_database(cls, filename):
452 file = path.join(config.gshhg_dir, filename)
453 if not path.exists(file):
454 from pyrocko import util
455 import zipfile
457 archive_path = path.join(config.gshhg_dir,
458 path.basename(cls.gshhg_url))
459 util.download_file(
460 cls.gshhg_url, archive_path,
461 status_callback=get_download_callback(
462 'Downloading GSHHG database...'))
463 if not zipfile.is_zipfile(archive_path):
464 raise util.DownloadError('GSHHG file is corrupted!')
465 logger.info('Unzipping GSHHG database...')
466 zipf = zipfile.ZipFile(archive_path)
467 zipf.extractall(config.gshhg_dir)
468 else:
469 logger.debug('Using cached %s' % filename)
470 return file
472 def get_polygons_at(self, lat, lon):
473 '''
474 Get all polygons whose bounding boxes contain point.
476 :param lat: Latitude in [deg]
477 :type lat: float
478 :param lon: Longitude in [deg]
479 :type lon: float
480 :returns: List of :class:`~pyrocko.dataset.gshhg.Polygon`
481 :rtype: list
482 '''
483 rp = []
484 for p in self.polygons:
485 if point_in_bounding_box((lat, lon), p.get_bounding_box()):
486 rp.append(p)
487 return rp
489 def get_polygons_within(self, west, east, south, north):
490 '''
491 Get all polygons whose bounding boxes intersect with a bounding box.
493 :param west: Western boundary in decimal degree
494 :type west: float
495 :param east: Eastern boundary in decimal degree
496 :type east: float
497 :param north: Northern boundary in decimal degree
498 :type north: float
499 :param south: Southern boundary in decimal degree
500 :type south: float
501 :returns: List of :class:`~pyrocko.dataset.gshhg.Polygon`
502 :rtype: list
503 '''
505 assert is_valid_bounding_box((west, east, south, north))
507 rp = []
508 for p in self.polygons:
509 if bounding_boxes_overlap(
510 p.get_bounding_box(), (west, east, south, north)):
512 rp.append(p)
513 return rp
516class Coastlines(GSHHGBase):
518 def is_point_on_land(self, lat, lon):
519 '''
520 Check whether a point is on land.
522 Lakes are considered not land.
524 :param lat: Latitude in [deg]
525 :type lat: float
526 :param lon: Longitude in [deg]
527 :type lon: float
529 :rtype: bool
530 '''
532 relevant_polygons = self.get_polygons_at(lat, lon)
533 relevant_polygons.sort()
535 land = False
536 for p in relevant_polygons:
537 if (p.is_land() or p.is_antarctic_grounding_line() or
538 p.is_island_in_lake()):
539 if p.contains_point((lat, lon)):
540 land = True
541 elif (p.is_lake() or p.is_antarctic_icefront() or
542 p.is_pond_in_island_in_lake()):
543 if p.contains_point((lat, lon)):
544 land = False
545 return land
547 def get_land_mask(self, points):
548 '''
549 Check whether given points are on land.
551 Lakes are considered not land.
553 :param points: Array of (lat, lon) pairs, shape (N, 2).
554 :type points: :class:`numpy.ndarray`
555 :return: Boolean land mask
556 :rtype: :class:`numpy.ndarray` of shape (N,)
557 '''
559 west, east, south, north = bounding_box_covering_points(points)
561 relevant_polygons = self.get_polygons_within(west, east, south, north)
562 relevant_polygons.sort()
564 mask = num.zeros(points.shape[0], dtype=bool)
565 for p in relevant_polygons:
566 if (p.is_land() or p.is_antarctic_grounding_line() or
567 p.is_island_in_lake()):
568 land = p.contains_points(points)
569 mask[land] = True
570 elif p.is_lake() or p.is_pond_in_island_in_lake():
571 water = p.contains_points(points)
572 mask[water] = False
573 return mask
575 @classmethod
576 def full(cls):
577 '''
578 Return the full-resolution GSHHG database.
579 '''
580 return cls(cls._get_database('gshhs_f.b'))
582 @classmethod
583 def high(cls):
584 '''
585 Return the high-resolution GSHHG database.
586 '''
587 return cls(cls._get_database('gshhs_h.b'))
589 @classmethod
590 def intermediate(cls):
591 '''
592 Return the intermediate-resolution GSHHG database.
593 '''
594 return cls(cls._get_database('gshhs_i.b'))
596 @classmethod
597 def low(cls):
598 '''
599 Return the low-resolution GSHHG database.
600 '''
601 return cls(cls._get_database('gshhs_l.b'))
603 @classmethod
604 def crude(cls):
605 '''
606 Return the crude-resolution GSHHG database.
607 '''
608 return cls(cls._get_database('gshhs_c.b'))
611class GSHHG(Coastlines):
612 pass
615class Borders(GSHHGBase):
617 @classmethod
618 def full(cls):
619 '''
620 Return the full-resolution GSHHG database.
621 '''
622 return cls(cls._get_database('wdb_borders_f.b'))
624 @classmethod
625 def high(cls):
626 '''
627 Return the high-resolution GSHHG database.
628 '''
629 return cls(cls._get_database('wdb_borders_h.b'))
631 @classmethod
632 def intermediate(cls):
633 '''
634 Return the intermediate-resolution GSHHG database.
635 '''
636 return cls(cls._get_database('wdb_borders_i.b'))
638 @classmethod
639 def low(cls):
640 '''
641 Return the low-resolution GSHHG database.
642 '''
643 return cls(cls._get_database('wdb_borders_l.b'))
645 @classmethod
646 def crude(cls):
647 '''
648 Return the crude-resolution GSHHG database.
649 '''
650 return cls(cls._get_database('wdb_borders_c.b'))
653class Rivers(GSHHGBase):
655 @classmethod
656 def full(cls):
657 '''
658 Return the full-resolution GSHHG database.
659 '''
660 return cls(cls._get_database('wdb_rivers_f.b'))
662 @classmethod
663 def high(cls):
664 '''
665 Return the high-resolution GSHHG database.
666 '''
667 return cls(cls._get_database('wdb_rivers_h.b'))
669 @classmethod
670 def intermediate(cls):
671 '''
672 Return the intermediate-resolution GSHHG database.
673 '''
674 return cls(cls._get_database('wdb_rivers_i.b'))
676 @classmethod
677 def low(cls):
678 '''
679 Return the low-resolution GSHHG database.
680 '''
681 return cls(cls._get_database('wdb_rivers_l.b'))
683 @classmethod
684 def crude(cls):
685 '''
686 Return the crude-resolution GSHHG database.
687 '''
688 return cls(cls._get_database('wdb_rivers_c.b'))